diff -r 06a7890f802e editor.util/manifest.mf --- a/editor.util/manifest.mf Wed May 28 13:50:31 2008 +0200 +++ b/editor.util/manifest.mf Wed May 28 14:48:54 2008 +0200 @@ -1,5 +1,5 @@ Manifest-Version: 1.0 OpenIDE-Module: org.netbeans.modules.editor.util/1 OpenIDE-Module-Localizing-Bundle: org/netbeans/lib/editor/util/Bundle.properties -OpenIDE-Module-Specification-Version: 1.23 +OpenIDE-Module-Specification-Version: 1.24 AutoUpdate-Show-In-Client: false diff -r 06a7890f802e editor.util/src/org/netbeans/lib/editor/util/CharSequenceUtilities.java --- a/editor.util/src/org/netbeans/lib/editor/util/CharSequenceUtilities.java Wed May 28 13:50:31 2008 +0200 +++ b/editor.util/src/org/netbeans/lib/editor/util/CharSequenceUtilities.java Wed May 28 14:48:54 2008 +0200 @@ -516,21 +516,34 @@ /** * Ensure that the given start and end parameters are valid indices * of the given text. - * @throws IndexOutOfBoundsException if the start or end are not within bounds - * of the given text. + * @param start must be >=0 and <=end. + * @param end must be >=start and <=textLength. + * @param length total length of a charsequence. + * @throws IndexOutOfBoundsException if the start or end are not within bounds. */ - public static void checkIndexesValid(CharSequence text, int start, int end) { + public static void checkIndexesValid(int start, int end, int length) { if (start < 0) { throw new IndexOutOfBoundsException("start=" + start + " < 0"); // NOI18N } if (end < start) { throw new IndexOutOfBoundsException("end=" + end + " < start=" + start); // NOI18N } - if (end > text.length()) { - throw new IndexOutOfBoundsException("end=" + end // NOI18N - + " > text.length()=" + text.length()); // NOI18N + if (end > length) { + throw new IndexOutOfBoundsException("end=" + end + " > length()=" + length); // NOI18N } } - + + /** + * Ensure that the given start and end parameters are valid indices + * of the given text. + * @param text non-null char sequence. + * @param start must be >=0 and <=end. + * @param end must be >=start and <=text.length(). + * @throws IndexOutOfBoundsException if the start or end are not within bounds + * of the given text. + */ + public static void checkIndexesValid(CharSequence text, int start, int end) { + checkIndexesValid(start, end, text.length()); + } } diff -r 06a7890f802e editor.util/src/org/netbeans/lib/editor/util/FlyOffsetGapList.java --- a/editor.util/src/org/netbeans/lib/editor/util/FlyOffsetGapList.java Wed May 28 13:50:31 2008 +0200 +++ b/editor.util/src/org/netbeans/lib/editor/util/FlyOffsetGapList.java Wed May 28 14:48:54 2008 +0200 @@ -60,8 +60,13 @@ private int offsetGapLength = Integer.MAX_VALUE / 2; // 32 bytes public FlyOffsetGapList() { + this(10); } - + + public FlyOffsetGapList(int initialCapacity) { + super(initialCapacity); + } + /** * Get the raw offset of the given element currently stored in the list. * diff -r 06a7890f802e editor.util/src/org/netbeans/lib/editor/util/OffsetGapList.java --- a/editor.util/src/org/netbeans/lib/editor/util/OffsetGapList.java Wed May 28 13:50:31 2008 +0200 +++ b/editor.util/src/org/netbeans/lib/editor/util/OffsetGapList.java Wed May 28 14:48:54 2008 +0200 @@ -82,8 +82,13 @@ private int offsetGapLength = Integer.MAX_VALUE / 2; // 32 bytes public OffsetGapList() { + this(10); } - + + public OffsetGapList(int initialCapacity) { + super(initialCapacity); + } + /** * Get the raw offset of the given element currently stored in the list. * diff -r 06a7890f802e editor.util/test/unit/src/org/netbeans/lib/editor/util/CharSequenceUtilitiesTest.java --- a/editor.util/test/unit/src/org/netbeans/lib/editor/util/CharSequenceUtilitiesTest.java Wed May 28 13:50:31 2008 +0200 +++ b/editor.util/test/unit/src/org/netbeans/lib/editor/util/CharSequenceUtilitiesTest.java Wed May 28 14:48:54 2008 +0200 @@ -138,6 +138,15 @@ // endsWith assertTrue(CharSequenceUtilities.endsWith(string, string.substring(CHARS_LENGTH - SUBSTR_LENGTH))); + + CharSequenceUtilities.checkIndexesValid(0, 3, 3); // start,end,length + CharSequenceUtilities.checkIndexesValid(1, 3, 3); + try { + CharSequenceUtilities.checkIndexesValid(1, 4, 3); + TestCase.fail("IndexOutOfBoundsException was expected."); + } catch (IndexOutOfBoundsException e) { + // Expected + } } public void generateChars(char[] chars) { diff -r 06a7890f802e lexer/apichanges.xml --- a/lexer/apichanges.xml Wed May 28 13:50:31 2008 +0200 +++ b/lexer/apichanges.xml Wed May 28 14:48:54 2008 +0200 @@ -113,6 +113,25 @@ + + + Joined Sections Lexing + + + + + +

+ Embeddings that request input sections to be joined before lexing + are now lexed as a single section. +
+ Token.isRemoved() was added to check whether a particular token + is still present in token hierarchy or whether it was removed as part of a modification. +

+
+ +
+ Lexer API Cleanup diff -r 06a7890f802e lexer/nbproject/project.properties --- a/lexer/nbproject/project.properties Wed May 28 13:50:31 2008 +0200 +++ b/lexer/nbproject/project.properties Wed May 28 14:48:54 2008 +0200 @@ -43,4 +43,4 @@ javadoc.arch=${basedir}/arch.xml javadoc.apichanges=${basedir}/apichanges.xml javadoc.docfiles=${basedir}/api/doc -spec.version.base=1.27.0 +spec.version.base=1.28.0 diff -r 06a7890f802e lexer/nbproject/project.xml --- a/lexer/nbproject/project.xml Wed May 28 13:50:31 2008 +0200 +++ b/lexer/nbproject/project.xml Wed May 28 14:48:54 2008 +0200 @@ -52,7 +52,7 @@ 1 - 1.15 + 1.24 diff -r 06a7890f802e lexer/src/org/netbeans/api/lexer/Token.java --- a/lexer/src/org/netbeans/api/lexer/Token.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/api/lexer/Token.java Wed May 28 14:48:54 2008 +0200 @@ -40,6 +40,8 @@ */ package org.netbeans.api.lexer; + +import java.util.List; /** * Token describes a lexical element of input text. @@ -207,12 +209,46 @@ * @return true if the token is flyweight or false otherwise. */ public abstract boolean isFlyweight(); + + /** + * Check whether this token is no longer part of the token hierarchy. + * + * @return true if the token was removed from the token hierarchy + * or false if it's still present in the hierarchy. + */ + public abstract boolean isRemoved(); /** * Check whether this token represents a complete token - * or whether it's a part of a complete token. + * or whether it's a particular part of a complete token. + *
+ * Some lexers may also use this information to express an incomplete token. + * For example an unclosed block comment at the end of java source + * is represented as a BLOCK_COMMENT token id and {@link PartType#START}. + * + * @return {@link PartType#COMPLETE} for regular token or other part types + * for particular token parts. */ public abstract PartType partType(); + + /** + * Get a complete token that is joined from multiple parts (this token is one of those parts). + * + * @return complete token or null if this token is not a part of any token. + */ + public abstract Token joinToken(); + + /** + * Get all token parts comprising this token ordered from lowest to highest part's offset. + *
+ * It's guaranteed that each token part is continuous in the input text + * (there are no gaps inside the token part's text). + *
+ * On the other hand there may be textual gaps between two adajcent token parts. + * + * @return list of token parts or null if the token is continuous. + */ + public abstract List> joinedParts(); /** * Quickly determine whether this token has any extra properties. diff -r 06a7890f802e lexer/src/org/netbeans/api/lexer/TokenHierarchyEvent.java --- a/lexer/src/org/netbeans/api/lexer/TokenHierarchyEvent.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/api/lexer/TokenHierarchyEvent.java Wed May 28 14:48:54 2008 +0200 @@ -123,7 +123,7 @@ * if this event's type is not {@link TokenHierarchyEventType#MODIFICATION}. */ public int modificationOffset() { - return info.modificationOffset(); + return info.modOffset(); } /** diff -r 06a7890f802e lexer/src/org/netbeans/api/lexer/TokenSequence.java --- a/lexer/src/org/netbeans/api/lexer/TokenSequence.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/api/lexer/TokenSequence.java Wed May 28 14:48:54 2008 +0200 @@ -42,12 +42,14 @@ package org.netbeans.api.lexer; import java.util.ConcurrentModificationException; +import org.netbeans.lib.lexer.EmbeddedTokenList; import org.netbeans.lib.lexer.EmbeddingContainer; -import org.netbeans.lib.lexer.LexerUtilsConstants; +import org.netbeans.lib.lexer.JoinTokenList; import org.netbeans.lib.lexer.SubSequenceTokenList; import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.lib.lexer.TokenList; import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.TokenOrEmbedding; /** * Token sequence allows to iterate between tokens @@ -246,7 +248,7 @@ public int offset() { checkTokenNotNull(); if (tokenOffset == -1) { - tokenOffset = tokenList.tokenOffset(tokenIndex); + tokenOffset = tokenList.tokenOffsetByIndex(tokenIndex); } return tokenOffset; } @@ -296,17 +298,7 @@ */ public TokenSequence embedded() { checkTokenNotNull(); - return embeddedImpl(null); - } - - private TokenSequence embeddedImpl(Language embeddedLanguage) { - if (token.isFlyweight()) - return null; - TokenList embeddedTokenList = LexerUtilsConstants.embeddedTokenList( - tokenList, tokenIndex, embeddedLanguage); - return (embeddedTokenList != null) - ? new TokenSequence(embeddedTokenList) - : null; + return embeddedImpl(null, false); } /** @@ -318,7 +310,62 @@ */ public TokenSequence embedded(Language embeddedLanguage) { checkTokenNotNull(); - return embeddedImpl(embeddedLanguage); + return embeddedImpl(embeddedLanguage, false); + } + + /** + * Get embedded token sequence that possibly joins multiple embeddings + * with the same language paths (if the embeddings allow it - see + * {@link LanguageEmbedding#joinSections()}) into a single input text + * which is then lexed as a single continuous text. + *
+ * If any of the resulting tokens crosses embedding's boundaries then the token + * is split into multiple part tokens. + *
+ * If the embedding does not join sections then this method behaves + * like {@link #embedded()}. + * + * @return embedded sequence or null if no embedding exists for this token. + * The token sequence will be positioned before first token of this embedding + * or to a join token in case the first token of this embedding is part of the join token. + */ + public TokenSequence embeddedJoined() { + checkTokenNotNull(); + return embeddedImpl(null, true); + } + + /** + * Get embedded token sequence if the token + * to which this token sequence is currently positioned + * has a language embedding. + * + * @throws IllegalStateException if {@link #token()} returns null. + */ + public TokenSequence embeddedJoined(Language embeddedLanguage) { + checkTokenNotNull(); + return embeddedImpl(embeddedLanguage, true); + } + + private TokenSequence embeddedImpl(Language embeddedLanguage, boolean joined) { + if (token.isFlyweight()) + return null; + + EmbeddedTokenList embeddedTokenList + = EmbeddingContainer.embeddedTokenList(tokenList, tokenIndex, embeddedLanguage, true); + if (embeddedTokenList != null) { + embeddedTokenList.embeddingContainer().updateStatus(); + TokenSequence tse; + JoinTokenList joinTokenList; + if (joined && (joinTokenList = embeddedTokenList.joinTokenList()) != null) { + tse = new TokenSequence(joinTokenList); + // Position to this etl's index + tse.moveIndex(joinTokenList.activeStartJoinIndex()); + } else { // Request regular TS or no joining available + tse = new TokenSequence(embeddedTokenList); + } + return tse; + } + return null; } /** @@ -402,10 +449,10 @@ checkModCount(); if (token != null) // Token already fetched tokenIndex++; - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(tokenIndex); - if (tokenOrEmbeddingContainer != null) { - AbstractToken origToken = token; - token = LexerUtilsConstants.token(tokenOrEmbeddingContainer); + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(tokenIndex); + if (tokenOrEmbedding != null) { // Might be null if no more tokens available + AbstractToken origToken = token; + token = tokenOrEmbedding.token(); // If origToken == null then the right offset might already be pre-computed from move() if (tokenOffset != -1) { if (origToken != null) { @@ -446,9 +493,9 @@ public boolean movePrevious() { checkModCount(); if (tokenIndex > 0) { - AbstractToken origToken = token; + AbstractToken origToken = token; tokenIndex--; - token = LexerUtilsConstants.token(tokenList.tokenOrEmbeddingContainer(tokenIndex)); + token = tokenList.tokenOrEmbedding(tokenIndex).token(); if (tokenOffset != -1) { // If the token list is continuous or the original token // is flyweight (there cannot be a gap before flyweight token) @@ -501,13 +548,15 @@ public int moveIndex(int index) { checkModCount(); if (index >= 0) { - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(index); - if (tokenOrEmbeddingContainer != null) { // enough tokens - resetTokenIndex(index); - } else // Token at the requested index does not exist - leave orig. index - resetTokenIndex(tokenCount()); - } else // index < 0 - resetTokenIndex(0); + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(index); + if (tokenOrEmbedding != null) { // enough tokens + resetTokenIndex(index, -1); + } else {// Token at the requested index does not exist - leave orig. index + resetTokenIndex(tokenCount(), -1); + } + } else {// index < 0 + resetTokenIndex(0, -1); + } return index - tokenIndex; } @@ -555,7 +604,7 @@ *

* If token filtering is used there may be gaps that are not covered * by any tokens and if the offset is contained in such gap then - * the token sequence will be positioned before the token that follows the gap. + * the token sequence will be positioned before the token that precedes the gap. *

* * @@ -563,96 +612,22 @@ * @return difference between the reqeuested offset * and the start offset of the token * before which the the token sequence gets positioned. + *
+ * If positioned right after the last token then (offset - last-token-end-offset) + * is returned. * * @throws ConcurrentModificationException if this token sequence * is no longer valid because of an underlying mutable input source modification. */ public int move(int offset) { checkModCount(); - // Token count in the list may change as possibly other threads - // keep asking for tokens. Root token list impls create tokens lazily - // when asked by clients. - int tokenCount = tokenList.tokenCountCurrent(); // presently created token count - if (tokenCount == 0) { // no tokens yet -> attempt to create at least one - if (tokenList.tokenOrEmbeddingContainer(0) == null) { // really no tokens at all - // In this case the token sequence could not be positioned yet - // so no need to reset "index" or other vars - resetTokenIndex(0); - return offset; - } - // Re-get the present token count (could be created a chunk of tokens at once) - tokenCount = tokenList.tokenCountCurrent(); + int[] indexAndTokenOffset = tokenList.tokenIndex(offset); + if (indexAndTokenOffset[0] != -1) { // Valid index and token-offset + resetTokenIndex(indexAndTokenOffset[0], indexAndTokenOffset[1]); + } else { // No tokens in token list (indexAndOffset[1] == 0) + resetTokenIndex(0, -1); // Set Index to zero and offset to invalid } - - // tokenCount surely >0 - int prevTokenOffset = tokenList.tokenOffset(tokenCount - 1); - if (offset > prevTokenOffset) { // may need to create further tokens if they do not exist - // Force token list to create subsequent tokens - // Cannot subtract offset by each token's length because - // there may be gaps between tokens due to token id filter use. - int tokenLength = LexerUtilsConstants.token(tokenList, tokenCount - 1).length(); - while (offset >= prevTokenOffset + tokenLength) { // above present token - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(tokenCount); - if (tokenOrEmbeddingContainer != null) { - AbstractToken t = LexerUtilsConstants.token(tokenOrEmbeddingContainer); - if (t.isFlyweight()) { // need to use previous tokenLength - prevTokenOffset += tokenLength; - } else { // non-flyweight token - retrieve offset - prevTokenOffset = tokenList.tokenOffset(tokenCount); - } - tokenLength = t.length(); - tokenCount++; - - } else { // no more tokens => position behind last token - resetTokenIndex(tokenCount); - tokenOffset = prevTokenOffset + tokenLength; // May assign the token's offset in advance - return offset - tokenOffset; - } - } - resetTokenIndex(tokenCount - 1); - tokenOffset = prevTokenOffset; // May assign the token's offset in advance - return offset - prevTokenOffset; - } - - // The offset is within the currently recognized tokens - // Use binary search - int low = 0; - int high = tokenCount - 1; - - while (low <= high) { - int mid = (low + high) / 2; - int midStartOffset = tokenList.tokenOffset(mid); - - if (midStartOffset < offset) { - low = mid + 1; - } else if (midStartOffset > offset) { - high = mid - 1; - } else { - // Token starting exactly at offset found - resetTokenIndex(mid); - tokenOffset = midStartOffset; - return 0; // right at the token begining - } - } - - // Not found exactly and high + 1 == low => high < low - // BTW there may be gaps between tokens; if offset is in gap then position to higher token - if (high >= 0) { // could be -1 - AbstractToken t = LexerUtilsConstants.token(tokenList, high); - prevTokenOffset = tokenList.tokenOffset(high); - // If gaps allowed check whether the token at "high" contains the offset - if (!tokenList.isContinuous() && offset > prevTokenOffset + t.length()) { - // Offset in the gap above the "high" token - high++; - prevTokenOffset += t.length(); - } - } else { // at least one token exists => use token at index 0 - high = 0; - prevTokenOffset = tokenList.tokenOffset(0); // result may differ from 0 - } - resetTokenIndex(high); - tokenOffset = prevTokenOffset; - return offset - prevTokenOffset; + return offset - indexAndTokenOffset[1]; } /** @@ -663,7 +638,7 @@ * @see #tokenCount() */ public boolean isEmpty() { - return (tokenIndex == 0 && tokenList.tokenOrEmbeddingContainer(0) == null); + return (tokenIndex == 0 && tokenList.tokenOrEmbedding(0) == null); } /** @@ -713,8 +688,9 @@ tl = stl.delegate(); startOffset = Math.max(startOffset, stl.limitStartOffset()); endOffset = Math.min(endOffset, stl.limitEndOffset()); - } else // Regular token list + } else {// Regular token list tl = tokenList; + } return new TokenSequence(new SubSequenceTokenList(tl, startOffset, endOffset)); } @@ -733,14 +709,14 @@ @Override public String toString() { return LexerUtilsConstants.appendTokenList(null, tokenList, - tokenIndex, 0, Integer.MAX_VALUE, true, 0).toString(); + tokenIndex, 0, Integer.MAX_VALUE, true, 0, true).toString(); } - private void resetTokenIndex(int index) { + private void resetTokenIndex(int index, int offset) { // Position to the given index e.g. by move() and moveIndex() tokenIndex = index; token = null; - tokenOffset = -1; + tokenOffset = offset; } private void checkTokenNotNull() { diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/BatchTokenList.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/BatchTokenList.java Wed May 28 14:48:54 2008 +0200 @@ -0,0 +1,247 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer; + +import java.util.ArrayList; +import java.util.Set; +import org.netbeans.api.lexer.Language; +import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.InputAttributes; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.token.TextToken; + + +/** + * Token list used for immutable inputs. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +public final class BatchTokenList +extends ArrayList> implements TokenList { + + /** Flag for additional correctness checks (may degrade performance). */ + private static final boolean testing = Boolean.getBoolean("netbeans.debug.lexer.test"); + + private static boolean maintainLAState; + + /** + * Check whether lookaheads and states are stored for testing purposes. + */ + public static boolean isMaintainLAState() { + return maintainLAState; + } + + public static void setMaintainLAState(boolean maintainLAState) { + BatchTokenList.maintainLAState = maintainLAState; + } + + private final TokenHierarchyOperation tokenHierarchyOperation; + + private final CharSequence inputSourceText; + + private final LanguagePath languagePath; + + private final Set skipTokenIds; + + private final InputAttributes inputAttributes; + + /** + * Lexer input used for lexing of the input. + */ + private LexerInputOperation lexerInputOperation; + + private LAState laState; + + + public BatchTokenList(TokenHierarchyOperation tokenHierarchyOperation, CharSequence inputText, + Language language, Set skipTokenIds, InputAttributes inputAttributes) { + this.tokenHierarchyOperation = tokenHierarchyOperation; + this.inputSourceText = inputText; + this.languagePath = LanguagePath.get(language); + this.skipTokenIds = skipTokenIds; + this.inputAttributes = inputAttributes; + if (testing) { // Maintain lookaheads and states when in test environment + laState = LAState.empty(); + } + this.lexerInputOperation = createLexerInputOperation(); + } + + protected LexerInputOperation createLexerInputOperation() { + return new TextLexerInputOperation(this); + } + + public TokenList rootTokenList() { + return this; // this list should always be the root list of the token hierarchy + } + + public CharSequence inputSourceText() { + return inputSourceText; + } + + public TokenHierarchyOperation tokenHierarchyOperation() { + return tokenHierarchyOperation; + } + + public LanguagePath languagePath() { + return languagePath; + } + + public synchronized int tokenCount() { + if (lexerInputOperation != null) { // still lexing + tokenOrEmbeddingImpl(Integer.MAX_VALUE); + } + return size(); + } + + public int tokenCountCurrent() { + return size(); + } + + public int tokenOffset(AbstractToken token) { + int rawOffset = token.rawOffset(); + // Children offsets should be absolute + return rawOffset; + } + + public int tokenOffsetByIndex(int index) { + AbstractToken token = existingToken(index); + int offset; + if (token.isFlyweight()) { + offset = 0; + while (--index >= 0) { + token = existingToken(index); + offset += token.length(); + if (!token.isFlyweight()) { + offset += token.offset(null); + break; + } + } + } else { // non-flyweight offset + offset = token.offset(null); + } + return offset; + } + + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexLazyTokenCreation(this, offset); + } + + public synchronized TokenOrEmbedding tokenOrEmbedding(int index) { + return tokenOrEmbeddingImpl(index); + } + + private TokenOrEmbedding tokenOrEmbeddingImpl(int index) { + while (lexerInputOperation != null && index >= size()) { + AbstractToken token = lexerInputOperation.nextToken(); + if (token != null) { // lexer returned valid token + add(token); + if (laState != null) { // maintaining lookaheads and states + laState = laState.add(lexerInputOperation.lookahead(), + lexerInputOperation.lexerState()); + } + } else { // no more tokens from lexer + lexerInputOperation.release(); + lexerInputOperation = null; + trimToSize(); + } + } + return (index < size()) ? get(index) : null; + } + + private AbstractToken existingToken(int index) { + return get(index).token(); + } + + public int lookahead(int index) { + return (laState != null) ? laState.lookahead(index) : -1; + } + + public Object state(int index) { + return (laState != null) ? laState.state(index) : null; + } + + public int startOffset() { + return 0; + } + + public int endOffset() { + int cntM1 = tokenCount() - 1; + if (cntM1 >= 0) + return tokenOffsetByIndex(cntM1) + tokenOrEmbeddingImpl(cntM1).token().length(); + return 0; + } + + public boolean isRemoved() { + return false; + } + + public int modCount() { + return LexerUtilsConstants.MOD_COUNT_IMMUTABLE_INPUT; // immutable input + } + + public synchronized AbstractToken replaceFlyToken( + int index, AbstractToken flyToken, int offset) { + TextToken nonFlyToken = ((TextToken)flyToken).createCopy(this, offset); + set(index, nonFlyToken); + return nonFlyToken; + } + + public void wrapToken(int index, EmbeddingContainer embeddingContainer) { + set(index, embeddingContainer); + } + + public InputAttributes inputAttributes() { + return inputAttributes; + } + + public boolean isContinuous() { + return (skipTokenIds == null); + } + + public Set skipTokenIds() { + return skipTokenIds; + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/CharPreprocessorOperation.java --- a/lexer/src/org/netbeans/lib/lexer/CharPreprocessorOperation.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/CharPreprocessorOperation.java Wed May 28 14:48:54 2008 +0200 @@ -241,8 +241,8 @@ public void notifyError(String errorMessage) { if (lexerInputOperation != null) { int parentIndex = parent.readIndex(); // Get the - lexerInputOperation.notifyPreprocessorError( - new CharPreprocessorError(errorMessage, parent.deepRawLength(parentIndex))); +// lexerInputOperation.notifyPreprocessorError( +// new CharPreprocessorError(errorMessage, parent.deepRawLength(parentIndex))); } } @@ -288,10 +288,10 @@ return tokenLength; } - public void tokenRecognized(int tokenLength) { + public void assignTokenLength(int tokenLength, boolean skipToken) { this.tokenLength = tokenLength; // Modify tokenLength for preprocessed characters - parent.tokenRecognized(parentLength(tokenLength)); + parent.assignTokenLength(parentLength(tokenLength), skipToken); } public PreprocessedTextStorage createPreprocessedTextStorage(CharSequence rawText, @@ -390,7 +390,7 @@ * This method is called after the token has been recognized * to clear internal data related to processing of token's characters. */ - public void tokenApproved() { + public void consumeTokenLength() { if (prepStartIndex != lookaheadIndex) { // some prep chars (may be after token length) if (prepStartIndex < tokenLength) { // prep chars before token end if (prepEndIndex <= tokenLength) { // no preprocessed chars past token end @@ -417,7 +417,7 @@ readIndex -= tokenLength; lookaheadIndex -= tokenLength; - parent.tokenApproved(); + parent.consumeTokenLength(); if (testing) consistencyCheck(); diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/CharProvider.java --- a/lexer/src/org/netbeans/lib/lexer/CharProvider.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/CharProvider.java Wed May 28 14:48:54 2008 +0200 @@ -101,18 +101,19 @@ * token length in the root lexer input operation due to character * preprocessing. *
- * The tokenLength should be cached by this provider. - * @param skip whether the token will be skipped due to filtering of its id. + * The tokenLength at a particular level should be cached by the corresponding provider. + * + * @param skipToken whether the token will be skipped due to filtering of its id. * @return true if the token is preprocessed or false otherwise. */ - void tokenRecognized(int tokenLength); + void assignTokenLength(int tokenLength, boolean skipToken); /** - * Notify this provider that the token was approved and - * that the tokenLength number of characters should be skipped - * (tokenLength should be cached by the provider). + * Notify this provider that the token was created and + * that the tokenLength number of characters should be consumed + * (tokenLength should continue to be held by the provider). */ - void tokenApproved(); + void consumeTokenLength(); /** * Collect extra preprocessed characters from the parent providers. diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/EmbeddedJoinInfo.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/EmbeddedJoinInfo.java Wed May 28 14:48:54 2008 +0200 @@ -0,0 +1,122 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer; + +/** + * Class that wraps a each embedded token list contained in join token list. + * + * @author Miloslav Metelka + */ + +public final class EmbeddedJoinInfo { + + public EmbeddedJoinInfo(JoinTokenListBase base, int rawJoinTokenIndex, int rawTokenListIndex) { + assert (base != null); + this.base = base; + this.rawJoinTokenIndex = rawJoinTokenIndex; + this.rawTokenListIndex = rawTokenListIndex; + } + + /** + * Reference to join token list base as a join-related extension + * of this ETL. + * In fact this is the only field through which the join token list base instance + * is referenced. + */ + public final JoinTokenListBase base; // 12 bytes (8-super + 4) + + /** + * Index in terms of join token list + * that corresponds to first token of wrapped ETL. + *
+ * The index must be gap-preprocessed. + */ + int rawJoinTokenIndex; // 16 bytes + + /** + * Index of related ETL in a join token list (base). + *
+ * The index must be gap-preprocessed. + */ + int rawTokenListIndex; // 20 bytes + + /** + * Number of items to go forward to reach last part of a join token. + * Zero otherwise. + */ + private int joinTokenLastPartShift; // 24 bytes + + public int joinTokenIndex() { + return base.joinTokenIndex(rawJoinTokenIndex); + } + + public void setRawJoinTokenIndex(int rawJoinTokenIndex) { + this.rawJoinTokenIndex = rawJoinTokenIndex; + } + + public int tokenListIndex() { + return base.tokenListIndex(rawTokenListIndex); + } + + public int joinTokenLastPartShift() { + return joinTokenLastPartShift; + } + + public void setJoinTokenLastPartShift(int joinTokenLastPartShift) { + this.joinTokenLastPartShift = joinTokenLastPartShift; + } + + public StringBuilder dumpInfo(StringBuilder sb) { + if (sb == null) + sb = new StringBuilder(70); + sb.append("jti=").append(joinTokenIndex()); + sb.append(", tli=").append(tokenListIndex()); + sb.append(", lps=").append(joinTokenLastPartShift()); + return sb; + } + + @Override + public String toString() { + return dumpInfo(null).toString(); + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/EmbeddedTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/EmbeddedTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/EmbeddedTokenList.java Wed May 28 14:48:54 2008 +0200 @@ -47,11 +47,11 @@ import org.netbeans.lib.editor.util.FlyOffsetGapList; import org.netbeans.lib.lexer.inc.MutableTokenList; import org.netbeans.api.lexer.InputAttributes; -import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.lexer.inc.TokenListChange; import org.netbeans.spi.lexer.LanguageEmbedding; import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.token.JoinToken; import org.netbeans.lib.lexer.token.TextToken; @@ -73,7 +73,7 @@ */ public final class EmbeddedTokenList -extends FlyOffsetGapList implements MutableTokenList { +extends FlyOffsetGapList> implements MutableTokenList { /** Flag for additional correctness checks (may degrade performance). */ private static final boolean testing = Boolean.getBoolean("netbeans.debug.lexer.test"); @@ -83,7 +83,7 @@ * made but was unsuccessful. */ public static final EmbeddedTokenList NO_DEFAULT_EMBEDDING - = new EmbeddedTokenList(null, null, null, null); + = new EmbeddedTokenList(null, null, null); /** * Embedding container carries info about the token into which this @@ -114,65 +114,103 @@ */ private EmbeddedTokenList nextEmbeddedTokenList; // 52 bytes + /** + * Additional information in case this ETL is contained in a JoinTokenList. + *
+ * Through this info a reference to the JoinTokenList is held. There is no other + * indexed structure so the EmbeddedTokenList members of TokenListList + * must be binary-searched. + */ + public EmbeddedJoinInfo joinInfo; // 56 bytes + public EmbeddedTokenList(EmbeddingContainer embeddingContainer, - LanguagePath languagePath, LanguageEmbedding embedding, - EmbeddedTokenList nextEmbedding) { + LanguagePath languagePath, LanguageEmbedding embedding + ) { + super(1); // Suitable for adding join-token parts this.embeddingContainer = embeddingContainer; this.languagePath = languagePath; this.embedding = embedding; - this.nextEmbeddedTokenList = nextEmbedding; if (embeddingContainer != null) { // ec may be null for NO_DEFAULT_EMBEDDING only - laState = LAState.initState(); - embeddingContainer.updateStatusImpl(); // Ensure startOffset() is up-to-date + initLAState(); } } - private void init() { - if (embedding.joinSections()) { - // Find the token list list - it should also init this token list - root().tokenHierarchyOperation().tokenListList(languagePath); - } else { // not joining => can lex individually - init(null); - } - } - - public void init(Object relexState) { - laState = (modCount() != -1 || testing) ? LAState.empty() : null; - + public void initAllTokens() { + assert (!embedding.joinSections()); // Joined token creation must be used instead +// initLAState(); // Lex the whole input represented by token at once LexerInputOperation lexerInputOperation = createLexerInputOperation( - 0, startOffset(), relexState); + 0, startOffset(), null); AbstractToken token = lexerInputOperation.nextToken(); while (token != null) { - updateElementOffsetAdd(token); // must subtract startOffset() - add(token); - if (laState != null) { - laState = laState.add(lexerInputOperation.lookahead(), - lexerInputOperation.lexerState()); - } + addToken(token, lexerInputOperation); token = lexerInputOperation.nextToken(); } lexerInputOperation.release(); lexerInputOperation = null; + trimStorageToSize(); + } + + private void initLAState() { + this.laState = (modCount() != LexerUtilsConstants.MOD_COUNT_IMMUTABLE_INPUT || testing) + ? LAState.empty() // Will collect LAState + : null; + } + /** + * Return join token list with active token list positioned to this ETL + * or return null if this.joinInfo == null. + */ + public JoinTokenList joinTokenList() { + if (joinInfo != null) { + TokenListList tokenListList = rootTokenList().tokenHierarchyOperation().existingTokenListList(languagePath); + int etlIndex = tokenListList.findIndex(startOffset()); + int tokenListStartIndex = etlIndex - joinInfo.tokenListIndex(); + JoinTokenList jtl = new JoinTokenList(tokenListList, joinInfo.base, tokenListStartIndex); + // Position to this etl's join index + jtl.setActiveTokenListIndex(etlIndex - tokenListStartIndex); + return jtl; + } + return null; + } + + /** + * Add token without touching laState - suitable for JoinToken's handling. + * + * @param token non-null token + */ + public void addToken(AbstractToken token) { + updateElementOffsetAdd(token); // must subtract startOffset() + add(token); + } + + public void addToken(AbstractToken token, LexerInputOperation lexerInputOperation) { + addToken(token); + if (laState != null) { // maintaining lookaheads and states + // Only get LA and state when necessary (especially lexerState() may be costly) + laState = laState.add(lexerInputOperation.lookahead(), lexerInputOperation.lexerState()); + } + } + + /** + * Used when dealing with PartToken instances. + */ + public void addToken(AbstractToken token, int lookahead, Object state) { + addToken(token); + if (laState != null) { // maintaining lookaheads and states + laState = laState.add(lookahead, state); + } + } + + public void trimStorageToSize() { trimToSize(); // Compact storage if (laState != null) laState.trimToSize(); } - /** - * Check whether this embedded token list is initialized. - *
- * If not then the updating process should not touch it unless - * the token list list exists for this particular language path. - */ - public boolean isInited() { - return (laState != LAState.initState()); - } - - EmbeddedTokenList nextEmbeddedTokenList() { + public EmbeddedTokenList nextEmbeddedTokenList() { return nextEmbeddedTokenList; } @@ -189,23 +227,24 @@ } public int tokenCount() { - synchronized (root()) { - if (laState == LAState.initState()) - init(); - return size(); - } + return tokenCountCurrent(); } - public Object tokenOrEmbeddingContainer(int index) { - synchronized (root()) { - if (laState == LAState.initState()) - init(); + public int tokenCountCurrent() { + return size(); + } + + public int joinTokenCount() { + int tokenCount = tokenCountCurrent(); + if (tokenCount > 0 && joinInfo.joinTokenLastPartShift() > 0) + tokenCount--; + return tokenCount; + } + + public TokenOrEmbedding tokenOrEmbedding(int index) { + synchronized (rootTokenList()) { return (index < size()) ? get(index) : null; } - } - - private Token existingToken(int index) { - return LexerUtilsConstants.token(tokenOrEmbeddingContainer(index)); } public int lookahead(int index) { @@ -223,219 +262,181 @@ * For token hierarchy snapshots the returned value is corrected * in the TokenSequence explicitly by adding TokenSequence.tokenOffsetDiff. */ - public int tokenOffset(int index) { + public int tokenOffsetByIndex(int index) { // embeddingContainer().checkStatusUpdated(); return elementOffset(index); } - public int childTokenOffset(int rawOffset) { - // Need to make sure that the startOffset is up-to-date - embeddingContainer.updateStatus(); - return childTokenOffsetNoUpdate(rawOffset); - } - - public int childTokenOffsetNoUpdate(int rawOffset) { + public int tokenOffset(AbstractToken token) { + if (token.getClass() == JoinToken.class) { + return token.offset(null); + } + int rawOffset = token.rawOffset(); // embeddingContainer().checkStatusUpdated(); - return embeddingContainer.tokenStartOffset() + embedding.startSkipLength() - + childTokenRelOffset(rawOffset); + int relOffset = (rawOffset < offsetGapStart()) + ? rawOffset + : rawOffset - offsetGapLength(); + return startOffset() + relOffset; } - /** - * Get difference between start offset of the particular child token - * against start offset of the root token. - */ - public int childTokenOffsetShift(int rawOffset) { - // Need to make sure that the startOffsetShift is up-to-date - embeddingContainer.updateStatus(); - return embeddingContainer.rootTokenOffsetShift() + childTokenRelOffset(rawOffset); - } - - /** - * Get child token's real offset which is always a relative value - * to startOffset value. - */ - private int childTokenRelOffset(int rawOffset) { - return (rawOffset < offsetGapStart()) - ? rawOffset - : rawOffset - offsetGapLength(); - } - - public char childTokenCharAt(int rawOffset, int index) { -// embeddingContainer().checkStatusUpdated(); - // Do not update the start offset shift - the token.text() - // did it before returning its result and its contract - // specifies that. - // Return chars by delegating to rootToken - return embeddingContainer.charAt( - embedding.startSkipLength() + childTokenRelOffset(rawOffset) + index); + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexBinSearch(this, offset, tokenCountCurrent()); } public int modCount() { - // Delegate to root to have the most up-to-date value for token sequence's check. - // Extra synchronization should not be necessary since the TokenSequence.embedded() - // calls EmbeddingContainer.embeddedTokenList() - // which calls which contains the synchronization and calls updateStatusImpl(). + // Mod count of EC must be returned to allow custom removed embeddings to work + // - they set LexerUtilsConstants.MOD_COUNT_REMOVED as cachedModCount. return embeddingContainer.cachedModCount(); } @Override public int startOffset() { // used by FlyOffsetGapList // embeddingContainer.checkStatusUpdated(); - return embeddingContainer.tokenStartOffset() + embedding.startSkipLength(); + return embeddingContainer.branchTokenStartOffset() + embedding.startSkipLength(); } public int endOffset() { // embeddingContainer.checkStatusUpdated(); - return embeddingContainer.tokenStartOffset() + embeddingContainer.token().length() + return embeddingContainer.branchTokenStartOffset() + embeddingContainer.token().length() - embedding.endSkipLength(); } + public int textLength() { + return embeddingContainer.token().length() - embedding.startSkipLength() - embedding.endSkipLength(); + } + public boolean isRemoved() { - embeddingContainer.updateStatusImpl(); return embeddingContainer.isRemoved(); } - public TokenList root() { + public TokenList rootTokenList() { return embeddingContainer.rootTokenList(); } - + + public CharSequence inputSourceText() { + return rootTokenList().inputSourceText(); + } + public TokenHierarchyOperation tokenHierarchyOperation() { - return root().tokenHierarchyOperation(); + return rootTokenList().tokenHierarchyOperation(); } - public AbstractToken rootToken() { - return embeddingContainer.rootToken(); + protected int elementRawOffset(TokenOrEmbedding elem) { + return elem.token().rawOffset(); } - protected int elementRawOffset(Object elem) { - return (elem.getClass() == EmbeddingContainer.class) - ? ((EmbeddingContainer)elem).token().rawOffset() - : ((AbstractToken)elem).rawOffset(); - } - - protected void setElementRawOffset(Object elem, int rawOffset) { - if (elem.getClass() == EmbeddingContainer.class) - ((EmbeddingContainer)elem).token().setRawOffset(rawOffset); - else - ((AbstractToken)elem).setRawOffset(rawOffset); + protected void setElementRawOffset(TokenOrEmbedding elem, int rawOffset) { + elem.token().setRawOffset(rawOffset); } - protected boolean isElementFlyweight(Object elem) { - // token wrapper always contains non-flyweight token - return (elem.getClass() != EmbeddingContainer.class) - && ((AbstractToken)elem).isFlyweight(); + protected boolean isElementFlyweight(TokenOrEmbedding elem) { + return elem.token().isFlyweight(); } - protected int elementLength(Object elem) { - return LexerUtilsConstants.token(elem).length(); + protected int elementLength(TokenOrEmbedding elem) { + return elem.token().length(); } public AbstractToken replaceFlyToken( int index, AbstractToken flyToken, int offset) { - synchronized (root()) { + synchronized (rootTokenList()) { TextToken nonFlyToken = ((TextToken)flyToken).createCopy(this, offset2Raw(offset)); set(index, nonFlyToken); return nonFlyToken; } } - public void wrapToken(int index, EmbeddingContainer embeddingContainer) { - synchronized (root()) { + public void wrapToken(int index, EmbeddingContainer embeddingContainer) { + synchronized (rootTokenList()) { set(index, embeddingContainer); } } public InputAttributes inputAttributes() { - return root().inputAttributes(); + return rootTokenList().inputAttributes(); } // MutableTokenList extra methods - public Object tokenOrEmbeddingContainerUnsync(int index) { + public TokenOrEmbedding tokenOrEmbeddingUnsync(int index) { return get(index); - } - - public int tokenCountCurrent() { - return size(); } public LexerInputOperation createLexerInputOperation( int tokenIndex, int relexOffset, Object relexState) { // embeddingContainer.checkStatusUpdated(); - CharSequence tokenText = embeddingContainer.token().text(); - int tokenStartOffset = embeddingContainer.tokenStartOffset(); - if (tokenText == null) { // Should not normally happen - debug the state - throw new IllegalStateException("Text of parent token is null. tokenStartOffset=" + tokenStartOffset + - ", tokenIndex=" + tokenIndex + ", relexOffset=" + relexOffset + ", relexState=" + relexState + - ", languagePath=" + languagePath() + ", inited=" + isInited() - ); - } - int endOffset = tokenStartOffset + tokenText.length() - - embedding.endSkipLength(); - return new TextLexerInputOperation(this, tokenIndex, relexState, tokenText, - tokenStartOffset, relexOffset, endOffset); +// AbstractToken branchToken = embeddingContainer.token(); + int endOffset = endOffset(); +// assert (!branchToken.isRemoved()) : "No lexing when token is removed"; +// assert (relexOffset >= startOffset()) : "Invalid relexOffset=" + relexOffset + " < startOffset()=" + startOffset(); + assert (relexOffset <= endOffset) : "Invalid relexOffset=" + relexOffset + " > endOffset()=" + endOffset; + return new TextLexerInputOperation(this, tokenIndex, relexState, relexOffset, endOffset); } public boolean isFullyLexed() { return true; } - public void replaceTokens(TokenListChange change, int removeTokenCount, int diffLength) { + public void replaceTokens(TokenListChange change, int diffLength) { int index = change.index(); // Remove obsolete tokens (original offsets are retained) - Object[] removedTokensOrEmbeddingContainers = new Object[removeTokenCount]; - copyElements(index, index + removeTokenCount, removedTokensOrEmbeddingContainers, 0); - int offset = change.offset(); - for (int i = 0; i < removeTokenCount; i++) { - Object tokenOrEmbeddingContainer = removedTokensOrEmbeddingContainers[i]; - AbstractToken token; - // It's necessary to update-status of all removed tokens' contained embeddings - // since otherwise (if they would not be up-to-date) they could not be updated later - // as they lose their parent token list which the update-status relies on. - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - EmbeddingContainer ec = (EmbeddingContainer)tokenOrEmbeddingContainer; - ec.updateStatusAndInvalidate(); - token = ec.token(); - } else { // Regular token - token = (AbstractToken)tokenOrEmbeddingContainer; + int removedTokenCount = change.removedTokenCount(); + int rootModCount = rootTokenList().modCount(); + AbstractToken firstRemovedToken = null; + if (removedTokenCount > 0) { + @SuppressWarnings("unchecked") + TokenOrEmbedding[] removedTokensOrEmbeddings = new TokenOrEmbedding[removedTokenCount]; + copyElements(index, index + removedTokenCount, removedTokensOrEmbeddings, 0); + firstRemovedToken = removedTokensOrEmbeddings[0].token(); + for (int i = 0; i < removedTokenCount; i++) { + TokenOrEmbedding tokenOrEmbedding = removedTokensOrEmbeddings[i]; + // It's necessary to update-status of all removed tokens' contained embeddings + // since otherwise (if they would not be up-to-date) they could not be updated later + // as they lose their parent token list which the update-status relies on. + EmbeddingContainer ec = tokenOrEmbedding.embedding(); + if (ec != null) { + ec.updateStatusUnsyncAndMarkRemoved(); + assert (ec.cachedModCount() != rootModCount) : "ModCount already updated"; // NOI18N + } + AbstractToken token = tokenOrEmbedding.token(); + if (!token.isFlyweight()) { + updateElementOffsetRemove(token); + token.setTokenList(null); + } } - if (!token.isFlyweight()) { - updateElementOffsetRemove(token); - token.setTokenList(null); + remove(index, removedTokenCount); // Retain original offsets + laState.remove(index, removedTokenCount); // Remove lookaheads and states + change.setRemovedTokens(removedTokensOrEmbeddings); + } + + if (diffLength != 0) { // JoinTokenList may pass 0 to not do any offset updates + // Move and fix the gap according to the performed modification. + // Instead of modOffset the gap is located at first relexed token's start + // because then the already precomputed index corresponding to the given offset + // can be reused. Otherwise there would have to be another binary search for index. + int startOffset = startOffset(); // updateStatus() should already be called + if (offsetGapStart() != change.offset() - startOffset) { + // Minimum of the index of the first removed index and original computed index + moveOffsetGap(change.offset() - startOffset, change.index()); } - offset += token.length(); + updateOffsetGapLength(-diffLength); } - remove(index, removeTokenCount); // Retain original offsets - laState.remove(index, removeTokenCount); // Remove lookaheads and states - change.setRemovedTokens(removedTokensOrEmbeddingContainers); - change.setRemovedEndOffset(offset); - - // Move and fix the gap according to the performed modification. - int startOffset = startOffset(); // updateStatus() should already be called - if (offsetGapStart() != change.offset() - startOffset) { - // Minimum of the index of the first removed index and original computed index - moveOffsetGap(change.offset() - startOffset, Math.min(index, change.offsetGapIndex())); - } - updateOffsetGapLength(-diffLength); // Add created tokens. // This should be called early when all the members are true tokens - List addedTokensOrBranches = change.addedTokensOrBranches(); - if (addedTokensOrBranches != null) { - for (Object tokenOrBranch : addedTokensOrBranches) { - @SuppressWarnings("unchecked") - AbstractToken token = (AbstractToken)tokenOrBranch; - updateElementOffsetAdd(token); + List> addedTokenOrEmbeddings = change.addedTokenOrEmbeddings(); + if (addedTokenOrEmbeddings != null) { + for (TokenOrEmbedding tokenOrEmbedding : addedTokenOrEmbeddings) { + updateElementOffsetAdd(tokenOrEmbedding.token()); } - addAll(index, addedTokensOrBranches); + addAll(index, addedTokenOrEmbeddings); laState = laState.addAll(index, change.laState()); change.syncAddedTokenCount(); // Check for bounds change only - if (removeTokenCount == 1 && addedTokensOrBranches.size() == 1) { + if (removedTokenCount == 1 && addedTokenOrEmbeddings.size() == 1) { // Compare removed and added token ids and part types - AbstractToken removedToken = LexerUtilsConstants.token(removedTokensOrEmbeddingContainers[0]); AbstractToken addedToken = change.addedToken(0); - if (removedToken.id() == addedToken.id() - && removedToken.partType() == addedToken.partType() + if (firstRemovedToken.id() == addedToken.id() + && firstRemovedToken.partType() == addedToken.partType() ) { change.markBoundsChange(); } @@ -459,20 +460,27 @@ this.embeddingContainer = embeddingContainer; } - public String toStringHeader() { - StringBuilder sb = new StringBuilder(50); - sb.append("ETL: <").append(startOffset()); + public StringBuilder dumpInfo(StringBuilder sb) { + if (sb == null) { + sb = new StringBuilder(50); + } + sb.append("ETL<").append(startOffset()); sb.append(",").append(endOffset()); - sb.append(">"); - sb.append(" IHC=").append(System.identityHashCode(this)); + sb.append("> TC=").append(tokenCountCurrent()); + sb.append("(").append(joinTokenCount()).append(')'); + if (joinInfo != null) { + sb.append(" JI:"); + joinInfo.dumpInfo(sb); + } + sb.append(", IHC=").append(System.identityHashCode(this)); sb.append('\n'); - return sb.toString(); + return sb; } @Override public String toString() { StringBuilder sb = new StringBuilder(256); - sb.append(toStringHeader()); + dumpInfo(sb); LexerUtilsConstants.appendTokenList(sb, this); return sb.toString(); } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/EmbeddingContainer.java --- a/lexer/src/org/netbeans/lib/lexer/EmbeddingContainer.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/EmbeddingContainer.java Wed May 28 14:48:54 2008 +0200 @@ -41,6 +41,7 @@ package org.netbeans.lib.lexer; +import org.netbeans.lib.lexer.inc.TokenHierarchyUpdate; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.LanguagePath; import org.netbeans.api.lexer.TokenHierarchyEventType; @@ -68,8 +69,8 @@ * @version 1.00 */ -public final class EmbeddingContainer { - +public final class EmbeddingContainer implements TokenOrEmbedding { + /** * Get embedded token list. * @@ -79,28 +80,20 @@ * should be obtained. * @param language whether only language embeddding of the particular language * was requested. It may be null if any embedding should be returned. + * @param initTokensInNew true if tokens should be created when a new ETL gets created. + * False in case this is called from TokenListList to grab ETLs for sections joining. */ public static EmbeddedTokenList embeddedTokenList( - TokenList tokenList, int index, Language embeddedLanguage) { - EmbeddingContainer ec; - AbstractToken token; - EmbeddingPresence ep; - TokenList rootTokenList = tokenList.root(); + TokenList tokenList, int index, Language embeddedLanguage, boolean initTokensInNew) { + TokenList rootTokenList = tokenList.rootTokenList(); synchronized (rootTokenList) { - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(index); - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - // Embedding container exists - @SuppressWarnings("unchecked") - EmbeddingContainer ecUC = (EmbeddingContainer)tokenOrEmbeddingContainer; - ec = ecUC; - token = ec.token(); + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(index); + EmbeddingContainer ec = tokenOrEmbedding.embedding(); + AbstractToken token = tokenOrEmbedding.token(); + EmbeddingPresence ep; + if (ec != null) { ep = null; - } else { // No embedding was created yet - ec = null; - @SuppressWarnings("unchecked") - AbstractToken t = (AbstractToken)tokenOrEmbeddingContainer; - token = t; // Check embedding presence ep = LexerUtilsConstants.innerLanguageOperation(tokenList.languagePath()).embeddingPresence(token.id()); if (ep == EmbeddingPresence.NONE) { @@ -112,7 +105,7 @@ // need to be processed to find the embedded token list for requested language. EmbeddedTokenList prevEtl; if (ec != null) { - ec.updateStatusImpl(); + ec.updateStatusUnsync(); EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); prevEtl = null; while (etl != null) { @@ -154,8 +147,7 @@ setEmbeddingPresence(token.id(), EmbeddingPresence.ALWAYS_QUERY); } // Check whether the token contains enough text to satisfy embedding's start and end skip lengths - CharSequence text = token.text(); // Should not be null here but rather check - if (text == null || embedding.startSkipLength() + embedding.endSkipLength() > text.length()) { + if (token.isRemoved() || embedding.startSkipLength() + embedding.endSkipLength() > token.length()) { return null; } if (ec == null) { @@ -165,10 +157,19 @@ LanguagePath embeddedLanguagePath = LanguagePath.get(languagePath, embedding.language()); EmbeddedTokenList etl = new EmbeddedTokenList(ec, - embeddedLanguagePath, embedding, null); + embeddedLanguagePath, embedding); // Preceding code should ensure that (prevEtl.nextEmbeddedTokenList == null) // so no need to call etl.setNextEmbeddedTokenList(prevEtl.nextEmbeddedTokenList()) ec.addEmbeddedTokenList(prevEtl, etl, true); + + if (initTokensInNew) { + if (embedding.joinSections()) { + // Init corresponding TokenListList + rootTokenList.tokenHierarchyOperation().tokenListList(embeddedLanguagePath); + } else { // sections not joined + etl.initAllTokens(); + } + } return (embeddedLanguage == null || embeddedLanguage == embedding.language()) ? etl : null; } // Update embedding presence to NONE @@ -200,7 +201,7 @@ public static boolean createEmbedding( TokenList tokenList, int index, Language embeddedLanguage, int startSkipLength, int endSkipLength, boolean joinSections) { - TokenList rootTokenList = tokenList.root(); + TokenList rootTokenList = tokenList.rootTokenList(); // Only create embedddings for valid operations so not e.g. for removed token list AbstractToken token; EmbeddingContainer ec; @@ -217,12 +218,10 @@ tokenHierarchyOperation = tokenList.tokenHierarchyOperation(); tokenHierarchyOperation.ensureWriteLocked(); - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(index); - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - // Embedding container exists - @SuppressWarnings("unchecked") - EmbeddingContainer ecUC = (EmbeddingContainer)tokenOrEmbeddingContainer; - ec = ecUC; + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(index); + ec = tokenOrEmbedding.embedding(); + token = tokenOrEmbedding.token(); + if (ec != null) { EmbeddedTokenList etl2 = ec.firstEmbeddedTokenList(); while (etl2 != null) { if (embeddedLanguage == etl2.languagePath().innerLanguage()) { @@ -230,11 +229,7 @@ } etl2 = etl2.nextEmbeddedTokenList(); } - token = ec.token(); } else { - @SuppressWarnings("unchecked") - AbstractToken t = (AbstractToken)tokenOrEmbeddingContainer; - token = t; if (token.isFlyweight()) { // embedding cannot exist for this flyweight token return false; } @@ -253,12 +248,12 @@ tokenHierarchyOperation.addLanguagePath(embeddedLanguagePath); // Make the embedded token list to be the first in the list etl = new EmbeddedTokenList( - ec, embeddedLanguagePath, embedding, ec.firstEmbeddedTokenList()); + ec, embeddedLanguagePath, embedding); ec.addEmbeddedTokenList(null, etl, false); // Fire the embedding creation to the clients // Threading model may need to be changed if necessary - tokenStartOffset = ec.tokenStartOffset(); + tokenStartOffset = ec.branchTokenStartOffset(); eventInfo = new TokenHierarchyEventInfo( tokenHierarchyOperation, TokenHierarchyEventType.EMBEDDING_CREATED, @@ -269,17 +264,19 @@ // When joining sections ensure that the token list list gets created // and the embedded tokens get created because they must exist // before possible next updating of the token list. - TokenListList tll = tokenHierarchyOperation.existingTokenListList(etl.languagePath()); + TokenListList tll = tokenHierarchyOperation.existingTokenListList(etl.languagePath()); + if (!embedding.joinSections()) { + etl.initAllTokens(); + } if (tll != null) { // Update tll by embedding creation - new TokenHierarchyUpdate(eventInfo).updateCreateEmbedding(etl); + new TokenHierarchyUpdate(eventInfo).updateCreateOrRemoveEmbedding(etl, true); } else { // tll == null if (embedding.joinSections()) { // Force token list list creation only when joining sections tll = tokenHierarchyOperation.tokenListList(etl.languagePath()); } } - } // Construct outer token change info @@ -304,24 +301,21 @@ public static boolean removeEmbedding( TokenList tokenList, int index, Language embeddedLanguage) { - TokenList rootTokenList = tokenList.root(); + TokenList rootTokenList = tokenList.rootTokenList(); // Only create embedddings for valid operations so not e.g. for removed token list EmbeddingContainer ec; synchronized (rootTokenList) { // Check TL.isRemoved() under syncing of rootTokenList - if (tokenList.isRemoved()) // Do not create embedding for removed TLs + if (tokenList.isRemoved()) // Do not remove embedding for removed TLs return false; // If TL is not removed then THO should be non-null TokenHierarchyOperation tokenHierarchyOperation = tokenList.tokenHierarchyOperation(); tokenHierarchyOperation.ensureWriteLocked(); - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(index); - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - // Embedding container exists - @SuppressWarnings("unchecked") - EmbeddingContainer ecUC = (EmbeddingContainer)tokenOrEmbeddingContainer; - ec = ecUC; - ec.updateStatusImpl(); + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(index); + ec = tokenOrEmbedding.embedding(); + if (ec != null) { + ec.updateStatusUnsync(); EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); EmbeddedTokenList prevEtl = null; while (etl != null) { @@ -337,10 +331,10 @@ // State that the removed embedding was not default - should not matter anyway ec.addEmbeddedTokenList(null, etl, false); etl.setEmbeddingContainer(ec); - ec.invalidateChildren(); + ec.markChildrenRemovedDeep(); // Fire the embedding creation to the clients - int startOffset = ec.tokenStartOffset(); + int startOffset = ec.branchTokenStartOffset(); TokenHierarchyEventInfo eventInfo = new TokenHierarchyEventInfo( tokenHierarchyOperation, TokenHierarchyEventType.EMBEDDING_REMOVED, @@ -369,7 +363,7 @@ TokenListList tll = tokenHierarchyOperation.existingTokenListList(etl.languagePath()); if (tll != null) { // update-status already called - new TokenHierarchyUpdate(eventInfo).updateRemoveEmbedding(etl); + new TokenHierarchyUpdate(eventInfo).updateCreateOrRemoveEmbedding(etl, false); } // Fire the change @@ -383,64 +377,56 @@ return false; } - private AbstractToken token; // 12 bytes (8-super + 4) + /** + * Token wrapped by this EC. + */ + private AbstractToken branchToken; // 12 bytes (8-super + 4) + + /** + * Root token list of the hierarchy should never be null and is final. + * + */ + private final TokenList rootTokenList; // 16 bytes /** * Cached modification count allows to determine whether the start offset * needs to be recomputed. */ - private int cachedModCount; // 16 bytes + private int cachedModCount; // 20 bytes - /** - * Root token list of the hierarchy. - * - */ - private final TokenList rootTokenList; // 20 bytes - - /** - * The root embedding container to which this embedding container relates. - *
- * It's used for getting of the start offset of the contained tokens - * and for getting of their text. - */ - private AbstractToken rootToken; // 24 bytes - /** * Cached start offset of the token for which this embedding container * was created. + *
+ * Its value may be shared by multiple embedded token lists. */ - private int tokenStartOffset; // 28 bytes + private int branchTokenStartOffset; // 24 bytes /** * First embedded token list in the single-linked list. */ - private EmbeddedTokenList firstEmbeddedTokenList; // 32 bytes + private EmbeddedTokenList firstEmbeddedTokenList; // 28 bytes - /** - * Difference between start offset of the first token in this token list - * against the start offset of the root token. - *
- * The offset gets refreshed upon updateStartOffset(). - */ - private int offsetShiftFromRootToken; // 36 bytes - /** * Embedded token list that represents the default embedding. * It may be EmbeddedTokenList.NO_DEFAULT_EMBEDDING * for failed attempt to create a default embedding. */ - private EmbeddedTokenList defaultEmbeddedTokenList; // 40 bytes + private EmbeddedTokenList defaultEmbeddedTokenList; // 32 bytes - EmbeddingContainer(AbstractToken token, TokenList rootTokenList) { - this.token = token; + EmbeddingContainer(AbstractToken branchToken, TokenList rootTokenList) { + if (branchToken == null) + throw new IllegalArgumentException("branchToken cannot be null"); + if (rootTokenList == null) + throw new IllegalArgumentException("rootTokenList cannot be null"); + this.branchToken = branchToken; this.rootTokenList = rootTokenList; - this.rootToken = token; // Has to be non-null since updateStatusImpl() would not update null rootToken // cachedModCount must differ from root's one to sync offsets // Root mod count can be >= 0 or -1 for non-incremental token lists - this.cachedModCount = -2; + this.cachedModCount = LexerUtilsConstants.MOD_COUNT_EMBEDDED_INITIAL; // Update the tokenStartOffset etc. - this assumes that the token // is already parented till the root token list. - updateStatusImpl(); + updateStatusUnsync(); } /** @@ -456,24 +442,25 @@ * @param ec non-null existing embedding container. */ EmbeddingContainer(EmbeddingContainer ec) { - this(ec.token(), ec.rootTokenList()); // Force init of tokenStartOffset and rootTokenOffsetShift - invalidate(); + this(ec.token(), ec.rootTokenList()); // Force init of tokenStartOffset + markRemoved(); } - private void invalidate() { - this.rootToken = null; - // Set cachedModCount to -2 which should not occur for regular cases - // which should force existing token sequences to be invalidated. - this.cachedModCount = -2; + private void markRemoved() { + // Set cachedModCount to LexerUtilsConstants.MOD_COUNT_REMOVED which should not occur + // for regular cases which should force existing token sequences to be invalidated. + this.cachedModCount = LexerUtilsConstants.MOD_COUNT_REMOVED; } - void invalidateChildren() { + void markChildrenRemovedDeep() { // Used by custom embedding removal EmbeddedTokenList etl = firstEmbeddedTokenList; while (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING) { for (int i = etl.tokenCountCurrent() - 1; i >= 0; i--) { - Object tokenOrEC = etl.tokenOrEmbeddingContainerUnsync(i); - if (tokenOrEC.getClass() == EmbeddingContainer.class) { - ((EmbeddingContainer)tokenOrEC).invalidateChildren(); + EmbeddingContainer ec = etl.tokenOrEmbeddingUnsync(i).embedding(); + if (ec != null) { + ec.updateStatusUnsync(); // First update the status + ec.markChildrenRemovedDeep(); + ec.markRemoved(); // Mark removed with the correctly updated offsets } } etl = etl.nextEmbeddedTokenList(); @@ -484,26 +471,12 @@ return cachedModCount; } - /** - * Check if this embedding container is up-to-date (updateStatusImpl() was called on it) - * which is useful for missing-update-status checks. - */ - public void checkStatusUpdated() { - if (cachedModCount != -2 && cachedModCount != rootTokenList.modCount() - && !checkStatusUpdatedThrowingException - ) { - // Prevent OOME because of nested throwing of exc - checkStatusUpdatedThrowingException = true; - String excMsg = "!!!INTERNAL ERROR!!! Status not updated on " + - this + "\nin token hierarchy\n" + rootTokenList.tokenHierarchyOperation(); - checkStatusUpdatedThrowingException = false; - throw new IllegalStateException(excMsg); - } + public final AbstractToken token() { + return branchToken; } - private static boolean checkStatusUpdatedThrowingException; - - public AbstractToken token() { - return token; + + public final EmbeddingContainer embedding() { + return this; } /** @@ -511,38 +484,18 @@ * The updateStatusImpl() should be called afterwards to update tokenStartOffset etc. */ public void reinit(AbstractToken token) { - this.token = token; - TokenList parentTokenList = token.tokenList(); - assert (parentTokenList != null); - if (parentTokenList.getClass() == EmbeddedTokenList.class) { - rootToken = ((EmbeddedTokenList)parentTokenList).rootToken(); - } else { // parent is a root token list: rootToken == token - rootToken = token; - } - updateStatusImpl(); + this.branchToken = token; + cachedModCount = LexerUtilsConstants.MOD_COUNT_EMBEDDED_INITIAL; + updateStatusUnsync(); } public TokenList rootTokenList() { return rootTokenList; } - public AbstractToken rootToken() { - return rootToken; - } - - public int tokenStartOffset() { + public int branchTokenStartOffset() { // checkStatusUpdated(); - return tokenStartOffset; - } - - public int rootTokenOffsetShift() { -// checkStatusUpdated(); - return offsetShiftFromRootToken; - } - - public char charAt(int tokenRelOffset) { -// checkStatusUpdated(); - return rootToken.charAt(offsetShiftFromRootToken + tokenRelOffset); + return branchTokenStartOffset; } public EmbeddedTokenList firstEmbeddedTokenList() { @@ -606,46 +559,92 @@ */ public boolean isRemoved() { // checkStatusUpdated(); - return (rootToken == null); + return (cachedModCount == LexerUtilsConstants.MOD_COUNT_REMOVED); } - public void updateStatusAndInvalidate() { - updateStatusImpl(); - invalidate(); - } - - public boolean updateStatus() { - synchronized (rootTokenList) { - return (updateStatusImpl() != null); - } + public void updateStatusUnsyncAndMarkRemoved() { + updateStatusUnsync(); + markRemoved(); } /** - * Update and return root token corresponding to this embedding container. + * Update status of this container in a synchronized way + * ensuring that no other thread will interfere - this is suitable + * for cases when there may be multiple concurrent readers + * using a token hierarchy and calling Token.offset() for example. + *
+ * Status updating fixes value of cached start offset of wrapped branch token + * (calling branch token(s) on upper level(s) for multiple embeddings' nesting). + * + * @return true if token is still part of token hierarchy or false + * if it was removed. */ - public AbstractToken updateStatusImpl() { - if (rootToken == null) - return null; // Removed from hierarchy - int rootModCount; - if (cachedModCount != (rootModCount = rootTokenList.modCount())) { - cachedModCount = rootModCount; - TokenList parentTokenList = token.tokenList(); - if (parentTokenList == null) { - rootToken = null; - } else if (parentTokenList.getClass() == EmbeddedTokenList.class) { - EmbeddedTokenList parentEtl = (EmbeddedTokenList)parentTokenList; - rootToken = parentEtl.embeddingContainer().updateStatusImpl(); - tokenStartOffset = parentEtl.childTokenOffsetNoUpdate(token.rawOffset()); - EmbeddingContainer parentEC = parentEtl.embeddingContainer(); - offsetShiftFromRootToken = tokenStartOffset - parentEC.tokenStartOffset() - + parentEC.rootTokenOffsetShift(); - } else { // parent is a root token list: rootToken == token - rootToken = token; - tokenStartOffset = token.offset(null); - offsetShiftFromRootToken = 0; + public boolean updateStatus() { + synchronized (rootTokenList) { + return updateStatusUnsync(); + } + } + + /** + * Unsynced synchronization of container - this method should only be used + * when there may be only a single thread accessing token hierarchy i.e. during + * token hierarchy modifications upon mutable input source modifications. + * + * @return true if token is still part of token hierarchy or false + * if it was removed. + * @see #updateStatus() + */ + public boolean updateStatusUnsync() { + return (updateStatusImpl(rootTokenList.modCount()) != LexerUtilsConstants.MOD_COUNT_REMOVED); + } + + /** + * Update the status of this embedding container when current mod count + * of a root token list is given. + * + * @param rootModCount modCount of a root token list. The token list either + * updates to it or to LexerUtilsConstants.MOD_COUNT_REMOVED if it's removed + * from a token hierarchy. If called by nested embeddings they should finally + * update to the same value. + * @return current modCount of this container. + */ + protected int updateStatusImpl(int rootModCount) { + if (cachedModCount != LexerUtilsConstants.MOD_COUNT_REMOVED && + cachedModCount != rootModCount + ) { + TokenList parentTokenList = branchToken.tokenList(); + if (parentTokenList == null) { // branch token removed from its parent token list + cachedModCount = LexerUtilsConstants.MOD_COUNT_REMOVED; + } else if (parentTokenList.getClass() == EmbeddedTokenList.class) { // deeper level embedding + EmbeddedTokenList parentEtl = (EmbeddedTokenList)parentTokenList; + cachedModCount = parentEtl.embeddingContainer().updateStatusImpl(rootModCount); + // After status of parent(s) was updated get the current branch token's offset + branchTokenStartOffset = parentEtl.tokenOffset(branchToken); + } else { // parent of branch token is a non-null root token list. + cachedModCount = rootModCount; + branchTokenStartOffset = parentTokenList.tokenOffset(branchToken); } } - return rootToken; + return cachedModCount; } + /** + * Check if this embedding container is up-to-date (updateStatusImpl() was called on it) + * which is useful for missing-update-status checks. + */ + public void checkStatusUpdated() { + if (cachedModCount != LexerUtilsConstants.MOD_COUNT_REMOVED + && cachedModCount != rootTokenList.modCount() + && !checkStatusUpdatedThrowingException + ) { + // Prevent OOME because of nested throwing of exc + checkStatusUpdatedThrowingException = true; + String excMsg = "!!!INTERNAL ERROR!!! Status not updated on " + + this + "\nin token hierarchy\n" + rootTokenList.tokenHierarchyOperation(); + checkStatusUpdatedThrowingException = false; + throw new IllegalStateException(excMsg); + } + } + private static boolean checkStatusUpdatedThrowingException; + } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/JoinLexerInputOperation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/JoinLexerInputOperation.java Wed May 28 14:48:54 2008 +0200 @@ -0,0 +1,415 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer; + +import java.util.List; +import org.netbeans.api.lexer.PartType; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.editor.util.ArrayUtilities; +import org.netbeans.spi.lexer.LexerInput; +import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.token.JoinToken; +import org.netbeans.lib.lexer.token.PartToken; +import org.netbeans.spi.lexer.TokenPropertyProvider; + +/** + * Lexer input operation over multiple joined sections (embedded token lists). + *
+ * It produces regular tokens (to be added directly into ETL represented by + * {@link #activeTokenList()} and also special {@link #JoinToken} instances + * in case a token spans boundaries of multiple ETLs. + *
+ * It can either work over JoinTokenList directly or, during a modification, + * it simulates that certain token lists are already removed/added to underlying token list. + *
+ * + * {@link #recognizedTokenLastInTokenList()} gives information whether the lastly + * produced token ends right at boundary of the activeTokenList. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +public class JoinLexerInputOperation extends LexerInputOperation { + + CharSequence inputSourceText; + + private TokenListText readText; // For servicing read() + + private TokenListText readExistingText; + + /** + * Token list in which the last recognized token started. + */ + private EmbeddedTokenList activeTokenList; + + /** + * Index of activeTokenList in JTL. + */ + private int activeTokenListIndex; + + /** + * End offset of the active token list. + */ + private int activeTokenListEndOffset; + + /** + * Real token's start offset used to derive the token's offset in ETL. + * Since tokenStartOffset is affected by TokenListList.readOffsetShift + * it cannot be used for this purpose. + */ + private int realTokenStartOffset; + + private boolean recognizedTokenJoined; // Whether recognized token will consist of parts + + + public JoinLexerInputOperation(JoinTokenList joinTokenList, int relexJoinIndex, Object lexerRestartState, + int activeTokenListIndex, int relexOffset + ) { + super(joinTokenList, relexJoinIndex, lexerRestartState); + this.inputSourceText = joinTokenList.inputSourceText(); + this.activeTokenListIndex = activeTokenListIndex; + tokenStartOffset = relexOffset; + readOffset = relexOffset; + // Assign realTokenStartOffset after fetchActiveTokenList() since it would overwrite it + realTokenStartOffset = relexOffset; + } + + public final void init() { + // Following code uses tokenList() method overriden in MutableJoinLexerInputOperation + // so the following code would fail when placed in constructor since the constructor of MJLIO would not yet run. + fetchActiveTokenList(); + readText = new TokenListText(); + readText.init(); + } + + /** + * Get active ETL into which the last produced token should be added. + * For join tokens there is an ETL into which a last part of JT should be added. + */ + public EmbeddedTokenList activeTokenList() { + return activeTokenList; + } + + /** + * Get index of active ETL into which the last produced token should be added. + * For join tokens there is an index of the last ETL into which a last part of JT should be added. + */ + public int activeTokenListIndex() { + return activeTokenListIndex; + } + + /** + * True if the last returned token is last in {@link #activeTokenList()}. + * For join tokens this applies to the last part of join token. + */ + public boolean recognizedTokenLastInTokenList() { + // realTokenStartOffset is set to the end of last recognized token + return (realTokenStartOffset == activeTokenListEndOffset); + } + + @Override + public int lastTokenEndOffset() { + return realTokenStartOffset; + } + + public int read(int offset) { // index >= 0 is guaranteed by contract + return readText.read(offset); + } + + public char readExisting(int offset) { + if (readText.isInBounds(offset)) { + return readText.inBoundsChar(offset); + } + if (readExistingText == null) { + readExistingText = new TokenListText(); + readExistingText.initFrom(readText); + } + return readExistingText.existingChar(offset); + } + + @Override + public void assignTokenLength(int tokenLength) { + super.assignTokenLength(tokenLength); + // Check whether activeTokenList needs to be changed due to various flags + if (recognizedTokenLastInTokenList()) { // Advance to next token list + // Since this is done when recognizing a next token it should be ok when recognizing + // last token in the last ETL (it should not go beyond last ETL). + do { + activeTokenListIndex++; + fetchActiveTokenList(); + } while (realTokenStartOffset == activeTokenListEndOffset); // Skip empty ETLs + } + // Advance to end of currently recognized token + realTokenStartOffset += tokenLength; + // Joined token past ETL's boundary + recognizedTokenJoined = (realTokenStartOffset > activeTokenListEndOffset); + } + + private void fetchActiveTokenList() { + activeTokenList = tokenList(activeTokenListIndex); + realTokenStartOffset = activeTokenList.startOffset(); + activeTokenListEndOffset = activeTokenList.endOffset(); + } + + public EmbeddedTokenList tokenList(int tokenListIndex) { // Also used by JoinTokenListChange + return ((JoinTokenList) tokenList).tokenList(tokenListIndex); + } + + protected int tokenListCount() { + return ((JoinTokenList) tokenList).tokenListCount(); + } + + protected void fillTokenData(AbstractToken token) { + if (!recognizedTokenJoined) { + token.setTokenList(activeTokenList); + // Subtract tokenLength since this is already advanced to end of token + token.setRawOffset(realTokenStartOffset - tokenLength); + } + } + + @Override + protected boolean isFlyTokenAllowed() { + return super.isFlyTokenAllowed() && !recognizedTokenJoined; + } + + @Override + protected AbstractToken createDefaultTokenInstance(T id) { + if (recognizedTokenJoined) { + return createJoinToken(id, null, PartType.COMPLETE); + } else { // Regular case + return super.createDefaultTokenInstance(id); + } + } + + @Override + protected AbstractToken createPropertyTokenInstance(T id, + TokenPropertyProvider propertyProvider, PartType partType) { + if (recognizedTokenJoined) { + return createJoinToken(id, null, partType); + } else { // Regular case + return super.createPropertyTokenInstance(id, propertyProvider, partType); + } + } + + private AbstractToken createJoinToken(T id, + TokenPropertyProvider propertyProvider, PartType partType) { + // Create join token + // realTokenStartOffset is already advanced by tokenLength so first decrease it + realTokenStartOffset -= tokenLength; + JoinToken joinToken = new JoinToken(id, tokenLength, propertyProvider, partType); + int joinPartCountEstimate = readText.tokenListIndex - activeTokenListIndex + 1; + @SuppressWarnings("unchecked") + PartToken[] parts = new PartToken[joinPartCountEstimate]; + int partLength = activeTokenListEndOffset - realTokenStartOffset; + PartToken partToken = new PartToken(id, partLength, propertyProvider, PartType.START, joinToken, 0, 0); + partToken.setTokenList(activeTokenList); + partToken.setRawOffset(realTokenStartOffset); // realTokenStartOffset already decreased by tokenLength + parts[0] = partToken; + int partIndex = 1; + int partTextOffset = partLength; // Length of created parts so far + int firstPartTokenListIndex = activeTokenListIndex; + do { + activeTokenListIndex++; + fetchActiveTokenList(); + // realTokenStartOffset set to start activeTokenList + PartType partPartType; + // Attempt total ETL's length as partLength + partLength = activeTokenListEndOffset - realTokenStartOffset; + if (partLength == 0) { + continue; + } + if (partTextOffset + partLength >= tokenLength) { // Last part + partLength = tokenLength - partTextOffset; + // If the partType of the join token is not complete then this will be PartType.MIDDLE + partPartType = (partType == PartType.START) ? PartType.MIDDLE : PartType.END; + } else { // Non-last part + partPartType = PartType.MIDDLE; + } + + partToken = new PartToken(id, partLength, propertyProvider, partPartType, joinToken, partIndex, partTextOffset); + // realTokenStartOffset still points to start of activeTokenList + partToken.setRawOffset(realTokenStartOffset); // ETL.startOffset() will be subtracted upon addition to ETL + partToken.setTokenList(activeTokenList); + partTextOffset += partLength; + parts[partIndex++] = partToken; + } while (partTextOffset < tokenLength); + // Update realTokenStartOffset which pointed to start of activeTokenList + realTokenStartOffset += partLength; + // Check that the array does not have any extra items + if (partIndex < parts.length) { + @SuppressWarnings("unchecked") + PartToken[] tmp = new PartToken[partIndex]; + System.arraycopy(parts, 0, tmp, 0, partIndex); + parts = tmp; + } + List> partList = ArrayUtilities.unmodifiableList(parts); + joinToken.setJoinedParts(partList, activeTokenListIndex - firstPartTokenListIndex); + // joinToken.setTokenList() makes no sense - JoinTokenList instances are temporary + // joinToken.setRawOffset() makes no sense - offset taken from initial part + return joinToken; + } + + /** + * Class for reading of text of subsequent ETLs - it allows to see their text + * as a consecutive character sequence (inputSourceText is used as a backing char sequence) + * with an increasing readIndex (it's not decremented after token's recognition). + */ + final class TokenListText { + + int tokenListIndex; + + int tokenListStartOffset; + + int tokenListEndOffset; + + /** + * A constant added to readOffset to allow a smoothly increasing reading offset + * when reading through multiple ETLs with gaps among them. + */ + int readOffsetShift; + + void init() { + EmbeddedTokenList etl = tokenList(activeTokenListIndex); + tokenListStartOffset = etl.startOffset(); + tokenListEndOffset = etl.endOffset(); + // No extra shift for first token + } + + void initFrom(TokenListText text) { + this.tokenListIndex = text.tokenListIndex; + this.tokenListStartOffset = text.tokenListStartOffset; + this.tokenListEndOffset = text.tokenListEndOffset; + this.readOffsetShift = text.readOffsetShift; + } + + /** + * Read next char or return EOF. + */ + int read(int offset) { + offset += readOffsetShift; + if (offset < tokenListEndOffset) { + return inputSourceText.charAt(offset); + } else { + while (++tokenListIndex < tokenListCount()) { + EmbeddedTokenList etl = tokenList(tokenListIndex); + tokenListStartOffset = etl.startOffset(); + // Increase offset shift by the size of gap between ETLs + readOffsetShift += tokenListStartOffset - tokenListEndOffset; + // Also shift given offset value + offset += tokenListStartOffset - tokenListEndOffset; + tokenListEndOffset = etl.endOffset(); + if (readOffset < tokenListEndOffset) { // ETL might be empty + return inputSourceText.charAt(offset); + } + } + tokenListIndex--; // Return to (tokenListCount() - 1) + return LexerInput.EOF; + } + } + + /** + * Check whether currently set text covers the given relative index. + * + * @param index index in the same metrics as readIndex. + * @return whether the given index is within current bounds. + */ + boolean isInBounds(int offset) { + offset += readOffsetShift; + return offset >= tokenListStartOffset && offset < tokenListEndOffset; + } + + /** + * Get char that was previously verified to be within bounds. + */ + char inBoundsChar(int offset) { + offset += readOffsetShift; + return inputSourceText.charAt(offset); + } + + char existingChar(int offset) { + offset += readOffsetShift; + if (offset < tokenListStartOffset) { + while (true) { // Char should exist + tokenListIndex--; + EmbeddedTokenList etl = tokenList(tokenListIndex); + tokenListEndOffset = etl.endOffset(); + // Decrease offset shift by the size of gap between ETLs + readOffsetShift -= tokenListStartOffset - tokenListEndOffset; + // Also shift given offset value + offset -= tokenListStartOffset - tokenListEndOffset; + tokenListStartOffset = etl.startOffset(); + if (readOffset >= tokenListStartOffset) { // ETL might be empty + return inputSourceText.charAt(offset); + } + } + + } else if (offset >= tokenListEndOffset) { + while (true) { // Char should exist + tokenListIndex++; + EmbeddedTokenList etl = tokenList(tokenListIndex); + tokenListStartOffset = etl.startOffset(); + // Increase offset shift by the size of gap between ETLs + readOffsetShift += tokenListStartOffset - tokenListEndOffset; + // Also shift given offset value + offset += tokenListStartOffset - tokenListEndOffset; + tokenListEndOffset = etl.endOffset(); + if (readOffset < tokenListEndOffset) { // ETL might be empty + return inputSourceText.charAt(offset); + } + } + + } + // Index within current bounds + return inputSourceText.charAt(offset); + } + + } + + @Override + public String toString() { + return super.toString() + ", realTokenStartOffset=" + realTokenStartOffset + // NOI18N + ", activeTokenListIndex=" + activeTokenListIndex + // NOI18N + ", activeTokenListEndOffset=" + activeTokenListEndOffset; // NOI18N + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/JoinTokenList.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/JoinTokenList.java Wed May 28 14:48:54 2008 +0200 @@ -0,0 +1,661 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer; + +import java.util.List; +import java.util.Set; +import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.InputAttributes; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.editor.util.ArrayUtilities; +import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.token.JoinToken; +import org.netbeans.lib.lexer.token.PartToken; + + +/** + * Join token list over certain range of ETLs of a TokenListList. + *
+ * There must always be at least one ETL in a JTL since otherwise there would be nobody + * holding EmbeddedJoinInfo that holds JoinTokenListBase which is crucial for JTL. + *
+ * It does not have any physical storage for tokens. Regular tokens + * are stored in individual ETLs. Tokens split across multiple ETLs + * are represented as PartToken in each ETL referencing a JoinToken. + * The only "countable" part is a last part of a JoinToken. + *
+ * Lookaheads and states are assigned to a last part of the JoinToken + * and it's stored normally in ETL like for regular tokens. + * + * @author Miloslav Metelka + */ + +public class JoinTokenList implements TokenList { + + /** + * Create join token list over an uninitialized set of embedded token lists + * and this method will perform initial lexing of the contained embedded token lists. + * + * @param tokenListList non-null tokenListList + * @param tokenListStartIndex index of first ETL contained in the desired JTL. + * @param tokenListCount total number of ETLs contained in the created JTL. + * @return non-null JTL. + */ + public static JoinTokenList init( + TokenListList tokenListList, int tokenListStartIndex, int tokenListCount + ) { + assert (tokenListCount > 0) : "tokenListCount must be >0"; + JoinTokenListBase base = new JoinTokenListBase(tokenListCount, tokenListList.rootTokenList().modCount()); + // Create join token list - just init first ETL's join info (read by JTL's constructor) + JoinTokenList jtl = new JoinTokenList(tokenListList, base, tokenListStartIndex); + // Notify will initialize all ETL.joinInfo except the first one (already inited) + JoinLexerInputOperation lexerInputOperation = new JoinLexerInputOperation(jtl, 0, null, 0, + tokenListList.get(tokenListStartIndex).startOffset()); + lexerInputOperation.init(); + AbstractToken token; + int joinTokenCount = 0; + int activeTokenListIndex = -1; + EmbeddedTokenList activeTokenList = null; + while ((token = lexerInputOperation.nextToken()) != null) { + if (activeTokenList != lexerInputOperation.activeTokenList()) { + do { + // There may be empty ETLs and ETLs that will contain part tokens + activeTokenListIndex++; + activeTokenList = jtl.tokenList(activeTokenListIndex); + activeTokenList.joinInfo = new EmbeddedJoinInfo(base, joinTokenCount, activeTokenListIndex); + } while (activeTokenListIndex < lexerInputOperation.activeTokenListIndex()); + } + if (token.getClass() == JoinToken.class) { + // ETL for last part + JoinToken joinToken = (JoinToken) token; + List> joinedParts = joinToken.joinedParts(); + // Index for active list of addition + // There may be ETLs so token list count may differ from part count + int extraTokenListSpanCount = joinToken.extraTokenListSpanCount(); + int tokenListIndex = activeTokenListIndex - extraTokenListSpanCount; + int joinedPartIndex = 0; + // Only add without the last part (will be added normally outside the loop) + // The last ETL can not be empty (must contain the last non-empty token part) + for (int i = 0; i < extraTokenListSpanCount; i++) { + EmbeddedTokenList etl = tokenListList.get(tokenListStartIndex + tokenListIndex + i); + // Check whether token list is non-empty by checking a text length that it covers. + // Do not use elt.tokenCount() since the tokens are just being added into ETL. + if (etl.textLength() > 0) { + etl.addToken(joinedParts.get(joinedPartIndex++), 0, null); + } + etl.joinInfo.setJoinTokenLastPartShift(extraTokenListSpanCount - i); + // Increase total number of tokens - use just the first part's ETL since others are ignored. + // There's a single token part in ETL only + } + // Last part will be added normally by subsequent code + token = joinedParts.get(joinedPartIndex); // Should be (joinedParts.size()-1) + } + activeTokenList.addToken(token, lexerInputOperation); + joinTokenCount++; + } + // Init possible empty ETL at the end + while (activeTokenListIndex < lexerInputOperation.activeTokenListIndex()) { + EmbeddedTokenList etl = jtl.tokenList(activeTokenListIndex); + assert (etl.joinInfo == null); + etl.joinInfo = new EmbeddedJoinInfo(base, joinTokenCount, activeTokenListIndex); + activeTokenListIndex++; + } + // Trim storage of all ETLs to their current size + for (int i = tokenListCount - 1; i >= 0; i--) { + tokenListList.get(tokenListStartIndex + i).trimStorageToSize(); + } + base.joinTokenCount = joinTokenCount; + // Could possibly subtract gap lengths but should not be necessary + return jtl; + } + + /** Backing token list list that holds ETLs. */ + protected final TokenListList tokenListList; // 16 bytes + + /** Info about token list count and join token index gap. */ + protected final JoinTokenListBase base; // 12 bytes (8-super + 4) + + /** Start index of ETLs in TLL used by this JTL. */ + protected final int tokenListStartIndex; // 20 bytes + + /** Index of active token list. */ + protected int activeTokenListIndex; // 24 bytes + + /** Token list currently servicing requests. */ + protected EmbeddedTokenList activeTokenList; // 28 bytes + + /** Start join index of activeTokenList */ + protected int activeStartJoinIndex; // 32 bytes + + /** End join index of activeTokenList */ + protected int activeEndJoinIndex; // 36 bytes + + public JoinTokenList(TokenListList tokenListList, JoinTokenListBase base, int tokenListStartIndex) { + this.tokenListList = tokenListList; + this.base = base; + this.tokenListStartIndex = tokenListStartIndex; + this.activeTokenListIndex = -1; // Signal invalid value + } + + public LanguagePath languagePath() { + return tokenListList.languagePath(); + } + + public TokenListList tokenListList() { + return tokenListList; + } + + public JoinTokenListBase base() { + return base; + } + + public int tokenListStartIndex() { + return tokenListStartIndex; + } + + /** + * Get token list contained in this join token list. + * + * @param index >=0 index of the token list in this joined token list. + * @return non-null embedded token list at the given index. + */ + public EmbeddedTokenList tokenList(int index) { + if (index < 0) + throw new IndexOutOfBoundsException("index=" + index + " < 0"); // NOI18N + if (index >= base.tokenListCount) + throw new IndexOutOfBoundsException("index=" + index + " >= size()=" + base.tokenListCount); // NOI18N + return tokenListList.get(tokenListStartIndex + index); + } + + public int tokenListCount() { + return base.tokenListCount; + } + + + public int tokenCountCurrent() { + return base.joinTokenCount; + } + + public int tokenCount() { + return tokenCountCurrent(); + } + + public int activeStartJoinIndex() { // Use by TS.embeddedImpl() + return activeStartJoinIndex; + } + + public int activeEndJoinIndex() { // Use by TokenListUpdater + return activeEndJoinIndex; + } + + public int activeTokenListIndex() { + return activeTokenListIndex; + } + + public void setActiveTokenListIndex(int activeTokenListIndex) { // Used by ETL.joinTokenList() + if (this.activeTokenListIndex != activeTokenListIndex) { + this.activeTokenListIndex = activeTokenListIndex; + fetchActiveTokenListData(); + } + } + + public EmbeddedTokenList activeTokenList() { + return activeTokenList; + } + + public TokenOrEmbedding tokenOrEmbedding(int index) { + locateTokenListByIndex(index); + TokenOrEmbedding tokenOrEmbedding = activeTokenList.tokenOrEmbedding(index - activeStartJoinIndex); + // Need to return complete token in case a token part was retrieved + AbstractToken token; + if (index == activeStartJoinIndex && // token part can only be the first in ETL + tokenOrEmbedding != null && // could be beyond end? + (token = tokenOrEmbedding.token()).getClass() == PartToken.class + ) { + tokenOrEmbedding = ((PartToken)token).joinTokenOrEmbedding(); + } + return tokenOrEmbedding; + } + + public int tokenOffset(AbstractToken token) { + // Should never be called for any token instances + throw new IllegalStateException("Internal error - should never be called"); + } + + public int tokenOffsetByIndex(int index) { + locateTokenListByIndex(index); + // Need to treat specially token parts - return offset of complete token + AbstractToken token; + if (index == activeStartJoinIndex && // token part can only be the first in ETL + (token = activeTokenList.tokenOrEmbedding(index - activeStartJoinIndex).token()).getClass() == PartToken.class + ) { + return ((PartToken)token).joinToken().offset(null); + } + return activeTokenList.tokenOffsetByIndex(index - activeStartJoinIndex); + } + + public int tokenListIndex(int offset, int startIndex, int endIndex) { + // First find the right ETL for the given offset and store it in activeTokenListIndex + // Use binary search + int low = startIndex; + int high = endIndex - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + int midStartOffset = tokenList(mid).startOffset(); + + if (midStartOffset < offset) { + low = mid + 1; + } else if (midStartOffset > offset) { + high = mid - 1; + } else { + // Token starting exactly at ETL.startOffset() + high = mid; + break; + } + } + // Use lower index => high + return high; // May return -1 + } + + public int[] tokenIndex(int offset) { + // Check if the current active token list covers the given offset. + // If not covered then only search below/above the current active ETL. + // It not only improves performance but it is NECESSARY for proper functionality + // of TokenListUpdater.updateJoined() since it may skip removed ETLs + // by manually using setActiveTokenListIndex() in the area below/above the removed ETLs. + boolean activeStartsBelowOffset = ((offset >= activeTokenList.startOffset()) || activeTokenListIndex == 0); + if (activeStartsBelowOffset) { + if (offset < activeTokenList.endOffset() || + (activeTokenListIndex + 1 == tokenListCount() || + offset < tokenList(activeTokenListIndex + 1).startOffset()) + ) { + // Current active ETL covers the area + } else if (activeTokenListIndex + 1 < tokenListCount()) { // Search above + activeTokenListIndex = tokenListIndex(offset, activeTokenListIndex + 1, tokenListCount()); + fetchActiveTokenListData(); + } + } else if (activeTokenListIndex > 0) { // Search below + activeTokenListIndex = tokenListIndex(offset, 0, activeTokenListIndex); + if (activeTokenListIndex < 0) { + activeTokenListIndex = 0; + } + fetchActiveTokenListData(); + } + + // Now search within a single ETL by binary search + EmbeddedJoinInfo joinInfo = activeTokenList.joinInfo; + int joinTokenLastPartShift = joinInfo.joinTokenLastPartShift(); + int searchETLTokenCount = activeTokenList.joinTokenCount(); + int[] indexAndTokenOffset = LexerUtilsConstants.tokenIndexBinSearch(activeTokenList, offset, searchETLTokenCount); + int etlIndex = indexAndTokenOffset[0]; // Index inside etl + indexAndTokenOffset[0] += joinInfo.joinTokenIndex(); // Make the index joinIndex + if (etlIndex == searchETLTokenCount && joinTokenLastPartShift > 0) { // Must move activeTokenList to last part + // Get last part and find out how much forward is the last part + activeTokenListIndex += joinTokenLastPartShift; + fetchActiveTokenListData(); + PartToken lastPartToken = (PartToken) activeTokenList.tokenOrEmbeddingUnsync(0).token(); + indexAndTokenOffset[1] = lastPartToken.joinToken().offset(null); + + } else if (etlIndex == 0) { // Possibly last part of a join token + AbstractToken token = activeTokenList.tokenOrEmbedding(0).token(); + if (token.getClass() == PartToken.class) { + // indexAndTokenOffset[0] is already ok - just fix token's offset + indexAndTokenOffset[1] = ((PartToken)token).joinToken().offset(null); + } + } + return indexAndTokenOffset; + } + + public AbstractToken replaceFlyToken(int index, AbstractToken flyToken, int offset) { + locateTokenListByIndex(index); + return activeTokenList.replaceFlyToken(index - activeStartJoinIndex, flyToken, offset); + } + + public void wrapToken(int index, EmbeddingContainer embeddingContainer) { + locateTokenListByIndex(index); + // !!! TBD - must not wrap complete tokens of join token list. + // Instead wrap all part tokens with another join token list + activeTokenList.wrapToken(index - activeStartJoinIndex, embeddingContainer); + } + + public final int modCount() { + return rootTokenList().modCount(); + } + + public InputAttributes inputAttributes() { + return rootTokenList().inputAttributes(); + } + + public int lookahead(int index) { + // Locate embedded token list for the last token part (only that one stores the LA) + locateTokenListByIndex(index); + return activeTokenList.lookahead(index - activeStartJoinIndex); + } + + public Object state(int index) { + // Locate embedded token list for the last token part (only that one stores the state) + locateTokenListByIndex(index); + return activeTokenList.state(index - activeStartJoinIndex); + } + + public final TokenList rootTokenList() { + return tokenListList.rootTokenList(); + } + + public CharSequence inputSourceText() { + return rootTokenList().inputSourceText(); + } + + public TokenHierarchyOperation tokenHierarchyOperation() { + return rootTokenList().tokenHierarchyOperation(); + } + + public boolean isContinuous() { + return false; // TBD can be partially continuous - could be improved + } + + public Set skipTokenIds() { + return null; // Not a top-level list -> no skip token ids + } + + public int startOffset() { + return tokenList(0).startOffset(); + } + + public int endOffset() { + return tokenList(base.tokenListCount - 1).endOffset(); + } + + public void updateStatus() { + synchronized (rootTokenList()) { + updateStatusUnsync(); + } + } + + /** + * Unsynced synchronization of join token list - this method should only be used + * when there may be only a single thread accessing token hierarchy i.e. during + * token hierarchy modifications upon mutable input source modifications. + * + * @see #updateStatus() + */ + public void updateStatusUnsync() { + int rootModCount; + if (base.lastModCount != (rootModCount = rootTokenList().modCount())) { + base.lastModCount = rootModCount; + // Update status of all the contained ETLs + for (int i = 0; i < base.tokenListCount; i++) { + tokenList(i).embeddingContainer().updateStatus(); + } + } + } + + public boolean isRemoved() { + return false; // Should never be parented + } + + /** + * Get index of token list where a token for a particular joinInde starts + * and index where it's located. + * + * @param index index in JTL. + * @return [0] contains token-list-index and [1] index of token-start info. + */ + public int tokenStartLocalIndex(int index) { + if (index == tokenCount()) { + if (activeTokenListIndex < tokenListCount() - 1) { + activeTokenListIndex = tokenListCount() - 1; + fetchActiveTokenListData(); + } + return activeTokenList.tokenCountCurrent(); // Index at end (can't be join tokens) + } + + locateTokenListByIndex(index); + AbstractToken token = activeTokenList.tokenOrEmbeddingUnsync(index - activeStartJoinIndex).token(); + if (token.getClass() == PartToken.class) { // Last part of join token + PartToken partToken = (PartToken) token; + activeTokenListIndex -= partToken.joinToken().extraTokenListSpanCount(); + fetchActiveTokenListData(); + // The first part of join token is last in the active ETL + return activeTokenList.tokenCountCurrent() - 1; + } + return index - activeStartJoinIndex; + } + + /** + * Locate the right activeTokenList to service the requested join index. + * + * @param joinIndex index in a join token list. + * @throws IndexOutOfBoundsException for joinIndex below zero. + */ + protected final void locateTokenListByIndex(int joinIndex) { + if (joinIndex < activeStartJoinIndex) { + if (joinIndex < 0) + throw new IndexOutOfBoundsException("index=" + joinIndex + " < 0"); + // Must be lower segment - first try the one below + activeTokenListIndex--; + fetchActiveTokenListData(); + if (joinIndex < activeStartJoinIndex) { // Still not covered + // Do binary search on <0, activeTokenListIndex - 1> + positionToJoinIndex(joinIndex, 0, activeTokenListIndex - 1); + } + + } else if (joinIndex >= activeEndJoinIndex) { + if (activeTokenListIndex + 1 < tokenListCount()) { + activeTokenListIndex++; + fetchActiveTokenListData(); + if (joinIndex >= activeEndJoinIndex) { // Still too high + // Do binary search on + positionToJoinIndex(joinIndex, activeTokenListIndex + 1, base.tokenListCount - 1); + } + } + } + // The index is within bounds of activeTokenList + } + + private void positionToJoinIndex(int joinIndex, int low, int high) { + while (low <= high) { + activeTokenListIndex = (low + high) >>> 1; + fetchActiveTokenListData(); + if (activeStartJoinIndex < joinIndex) { + low = activeTokenListIndex + 1; + } else if (activeStartJoinIndex > joinIndex) { + high = activeTokenListIndex - 1; + } else { // first token of the active token list + return; + } + } + // low == high + 1 + if (activeTokenListIndex != high) { + activeTokenListIndex = high; + fetchActiveTokenListData(); + } + } + + protected final void fetchActiveTokenListData() { + activeTokenList = tokenList(activeTokenListIndex); + activeStartJoinIndex = activeTokenList.joinInfo.joinTokenIndex(); + activeEndJoinIndex = activeStartJoinIndex + activeTokenList.joinTokenCount(); + } + + public String checkConsistency() { + // Check regular consistency without checking embeddings + String error = LexerUtilsConstants.checkConsistencyTokenList(this, false); + if (error == null) { + // Check individual ETLs and their join infos + int joinTokenCount = 0; + JoinToken activeJoinToken = null; + int joinedPartCount = 0; + int nextCheckPartIndex = 0; + for (int tokenListIndex = 0; tokenListIndex < tokenListCount(); tokenListIndex++) { + EmbeddedTokenList etl = tokenList(tokenListIndex); + error = LexerUtilsConstants.checkConsistencyTokenList(etl, false); + if (error != null) + return error; + + if (etl.joinInfo == null) { + return "Null joinInfo for ETL at token-list-index " + tokenListIndex; // NOI18N + } + if (joinTokenCount != etl.joinInfo.joinTokenIndex()) { + return "joinTokenIndex=" + joinTokenCount + " != etl.joinInfo.joinTokenIndex()=" + // NOI18N + etl.joinInfo.joinTokenIndex() + " at token-list-index " + tokenListIndex; // NOI18N + } + if (tokenListIndex != etl.joinInfo.tokenListIndex()) { + return "token-list-index=" + tokenListIndex + " != etl.joinInfo.tokenListIndex()=" + // NOI18N + etl.joinInfo.tokenListIndex(); + } + + int etlTokenCount = etl.tokenCount(); + int etlJoinTokenCount = etlTokenCount; + if (etlTokenCount > 0) { + AbstractToken token = etl.tokenOrEmbeddingUnsync(0).token(); + int startCheckIndex = 0; + // Check first token (may also be the last token) + if (activeJoinToken != null) { + if (token.getClass() != PartToken.class) { + return "Unfinished joinToken at token-list-index=" + tokenListIndex; // NOI18N + } + error = checkConsistencyJoinToken(activeJoinToken, token, nextCheckPartIndex++, tokenListIndex); + if (error != null) { + return error; + } + if (nextCheckPartIndex == joinedPartCount) { + activeJoinToken = null; // activeJoinToken ended + } else { // For non-last there must be no other tokens in the list + if (etlTokenCount > 1) { + return "More than one token and non-last part of unfinished join token" + // NOI18N + " at token-list-index " + tokenListIndex; // NOI18N + } + // etlTokenCount so the first token is last too + // and this is an ETL with single token part that continues activeJoinToken + etlJoinTokenCount--; + } + startCheckIndex = 1; + } + // Check last token + if (etlTokenCount > startCheckIndex) { + assert (activeJoinToken == null); + token = etl.tokenOrEmbeddingUnsync(etlTokenCount - 1).token(); + if (token.getClass() == PartToken.class) { + etlJoinTokenCount--; + activeJoinToken = ((PartToken) token).joinToken(); + joinedPartCount = activeJoinToken.joinedParts().size(); + nextCheckPartIndex = 0; + if (joinedPartCount < 2) { + return "joinedPartCount=" + joinedPartCount + " < 2"; + } + error = checkConsistencyJoinToken(activeJoinToken, token, nextCheckPartIndex++, tokenListIndex); + if (error != null) + return error; + } + } + // Check that no other token are part tokens than the relevant ones + for (int j = startCheckIndex; j < etlJoinTokenCount; j++) { + if (etl.tokenOrEmbeddingUnsync(j).token().getClass() == PartToken.class) { + return "Inside PartToken at index " + j + "; joinTokenCount=" + etlJoinTokenCount; // NOI18N + } + } + } + if (etlJoinTokenCount != etl.joinTokenCount()) { + return "joinTokenCount=" + etlJoinTokenCount + " != etl.joinTokenCount()=" + // NOI18N + etl.joinTokenCount() + " at token-list-index " + tokenListIndex; // NOI18N + } + joinTokenCount += etlJoinTokenCount; + } // end-of-for over ETLs + if (activeJoinToken != null) { + return "Unfinished join token at end"; + } + if (joinTokenCount != base.joinTokenCount) { + return "joinTokenCount=" + joinTokenCount + " != base.joinTokenCount=" + base.joinTokenCount; // NOI18N + } + } + // Check placement of index gap + return error; + } + + private String checkConsistencyJoinToken(JoinToken joinToken, AbstractToken token, int partIndex, int tokenListIndex) { + PartToken partToken = (PartToken) token; + if (joinToken.joinedParts().get(partIndex) != token) { + return "activeJoinToken.joinedParts().get(" + partIndex + // NOI18N + ") != token at token-list-index" + tokenListIndex; // NOI18N + } + if (partToken.joinToken() != joinToken) { + return "Invalid join token of part at partIndex " + partIndex + // NOI18N + " at token-list-index " + tokenListIndex; // NOI18N + } + EmbeddedTokenList etl = tokenList(tokenListIndex); + int lps = etl.joinInfo.joinTokenLastPartShift(); + if (lps < 0) { + return "lps=" + lps + " < 0"; + } + + if (tokenListIndex + lps >= tokenListCount()) { + return "Invalid lps=" + lps + // NOI18N + " at token-list-index " + tokenListIndex + // NOI18N + "; tokenListCount=" + tokenListCount(); // NOI18N + } + AbstractToken lastPart = tokenList(tokenListIndex + lps).tokenOrEmbeddingUnsync(0).token(); + if (lastPart.getClass() != PartToken.class) { + return "Invalid lps: lastPart not PartToken " + lastPart.dumpInfo(null, null, true, 0) + // NOI18N + " at token-list-index " + tokenListIndex; // NOI18N + } + if (((PartToken)lastPart).joinToken().lastPart() != lastPart) { + return "Invalid lps: Not last part " + lastPart.dumpInfo(null, null, true, 0) + // NOI18N + " at token-list-index " + tokenListIndex; // NOI18N + } + return null; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(256); + int tokenListCount = tokenListCount(); + int digitCount = String.valueOf(tokenListCount - 1).length(); + for (int i = 0; i < tokenListCount; i++) { + ArrayUtilities.appendBracketedIndex(sb, i, digitCount); + tokenList(i).dumpInfo(sb); // includes '\n' + } + return LexerUtilsConstants.appendTokenList(sb, this).toString(); + } + +} + diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/JoinTokenListBase.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/JoinTokenListBase.java Wed May 28 14:48:54 2008 +0200 @@ -0,0 +1,174 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer; + +/** + * Structure holding a token list count that bases a JoinTokenList. + *
+ * There is also an information regarding a join token index gap inside the series + * of contained embedded token lists. + *
+ * Join token list can be created as a wrapper around this class with a knowledge + * of token list list and an token list index at which the join token list would start. + * + * @author Miloslav Metelka + */ + +public final class JoinTokenListBase { + + private static final int INDEX_GAP_LENGTH_INITIAL_SIZE = (Integer.MAX_VALUE >> 1); + + /** + * Number of embedded token lists contained in join token list. + */ + int tokenListCount; // 12 bytes (8-super + 4) + + /** + * Total count of tokens contained in JoinTokenList. + */ + int joinTokenCount; // 16 bytes + + /** + * Last modCount for which the JoinTokenList was updated. + *
+ * JoinTokenList.updateStatus() may be used to update. + */ + int lastModCount; // 20 bytes + + /** + * Index among contained embedded token lists where both index-related gaps are located. + */ + int indexGapsIndex; // 24 bytes + + /** + * Length of an index gap for computation of indexes in a JoinTokenList + * based on ETLs. + *
+ * The above gap checking is done by checking whether the index is above gap length + * since the initial gap length is so high that the indexes should never reach + * its size (even decreased by added items). + */ + int joinTokenIndexGapLength = INDEX_GAP_LENGTH_INITIAL_SIZE; // 28 bytes + + /** + * Length of an index gap for computation of index of ETL in a JoinTokenList + * which is useful for finding of a start-token-list-index of the join token list. + *
+ * The above gap checking is done by checking whether the index is above gap length + * since the initial gap length is so high that the indexes should never reach + * its size (even decreased by added items). + */ + int tokenListIndexGapLength = INDEX_GAP_LENGTH_INITIAL_SIZE; // 32 bytes + + JoinTokenListBase(int tokenListCount, int lastModCount) { + this.tokenListCount = tokenListCount; + this.lastModCount = lastModCount; + // Move index gap to be at the end of all contained token lists + this.indexGapsIndex = tokenListCount; + } + + int joinTokenIndex(int rawJoinTokenIndex) { + return (rawJoinTokenIndex < joinTokenIndexGapLength) + ? rawJoinTokenIndex + : rawJoinTokenIndex - joinTokenIndexGapLength; + } + + int tokenListIndex(int rawTokenListIndex) { + return (rawTokenListIndex < tokenListIndexGapLength) + ? rawTokenListIndex + : rawTokenListIndex - tokenListIndexGapLength; + } + + /** + * Move both gaps in sync so that ETL.JoinInfo in an ETL at "index" is above both gaps. + * + * @param tokenListList non-null TLL. + * @param tokenListStartIndex points to first list belonging to a JTL. + * @param index index to which the gaps should be moved. + */ + + public void moveIndexGap(TokenListList tokenListList, int tokenListStartIndex, int index) { + if (index < indexGapsIndex) { + // Items above index should be moved to be above gap + do { + EmbeddedJoinInfo joinInfo = tokenListList.get(tokenListStartIndex + index).joinInfo; + joinInfo.rawTokenListIndex += tokenListIndexGapLength; + joinInfo.rawJoinTokenIndex += joinTokenIndexGapLength; + } while (++index < indexGapsIndex); + } else if (index > indexGapsIndex) { + // Items above index should be moved to be above gap + do { + index--; + EmbeddedJoinInfo joinInfo = tokenListList.get(tokenListStartIndex + index).joinInfo; + joinInfo.rawTokenListIndex += tokenListIndexGapLength; + joinInfo.rawJoinTokenIndex += joinTokenIndexGapLength; + } while (index > indexGapsIndex); + } + } + + public void tokenListModNotify(int tokenListCountDiff) { + // Gap assumed to be above last added token list or above + indexGapsIndex += tokenListCountDiff; // Move gap above added + tokenListCount += tokenListCountDiff; + tokenListIndexGapLength -= tokenListCountDiff; + } + + public int joinTokenCount() { + return joinTokenCount; + } + + public void updateJoinTokenCount(int joinTokenCountDiff) { + joinTokenCount += joinTokenCountDiff; + joinTokenIndexGapLength -= joinTokenCountDiff; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(70); + sb.append("tokenListCount=").append(tokenListCount); + sb.append(", tokenCount=").append(joinTokenCount); + sb.append(", lastModCount=").append(lastModCount); + sb.append(", indexGapsIndex=").append(indexGapsIndex); + return sb.toString(); + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/LAState.java --- a/lexer/src/org/netbeans/lib/lexer/LAState.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/LAState.java Wed May 28 14:48:54 2008 +0200 @@ -52,19 +52,10 @@ private static final LAState EMPTY = new NoState(0); - private static final LAState INIT_STATE = new NoState(0); - public static LAState empty() { return EMPTY; } - /** - * Special state for marking that an embedded token list was not inited yet. - */ - public static LAState initState() { - return INIT_STATE; - } - static int withExtraCapacity(int capacity) { return capacity * 3 / 2 + 4; } @@ -105,6 +96,13 @@ return capacity() - gapLength; } + /** + * Add a particular lookahead and state. + * + * @param lookahead + * @param state + * @return either same or a new LAState containing the given lookahead and state. + */ public final LAState add(int lookahead, Object state) { LAState ret; if (gapLength > 0) { // enough space @@ -154,6 +152,12 @@ protected abstract LAState upgrade(int capacity, Class laStateClass); + /** + * Whether an upgrade is necessary when the given laStateClass needs to be used. + * + * @param laStateClass non-null requested laStateClass + * @return true if upgrade is necessary. + */ protected abstract boolean isUpgrade(Class laStateClass); protected abstract Class addToGapStart(int lookahead, Object state); diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/LanguageOperation.java --- a/lexer/src/org/netbeans/lib/lexer/LanguageOperation.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/LanguageOperation.java Wed May 28 14:48:54 2008 +0200 @@ -109,8 +109,7 @@ if (!existingLanguagePaths.contains(lp)) { newLanguagePaths.add(lp); } - @SuppressWarnings("unchecked") - Language language = (Language)lp.innerLanguage(); + Language language = (Language)LexerUtilsConstants.innerLanguage(lp); if (!exploredLanguages.contains(language)) { exploredLanguages.add(language); Set ids = language.tokenIds(); @@ -162,6 +161,10 @@ WeakListeners.create(PropertyChangeListener.class, this, LanguageManager.getInstance())); } + public Language language() { + return language; + } + public synchronized TokenValidator tokenValidator(T id) { if (tokenValidators == null) { tokenValidators = allocateTokenValidatorArray(language.maxOrdinal() + 1); @@ -188,31 +191,10 @@ } FlyItem item = flyItems[id.ordinal()]; if (item == null) { - token = new TextToken(id, text); // create flyweight token - token.makeFlyweight(); - flyItems[id.ordinal()] = new FlyItem(token); - } else { // already a valid item - token = item.token(); - if (token.text() != text) { - token = item.token2(); - if (token == null || token.text() != text) { - token = item.token(); - if (!CharSequenceUtilities.textEquals(token.text(), text)) { - token = item.token2(); - if (token == null || !CharSequenceUtilities.textEquals(token.text(), text)) { - // Create new token - token = new TextToken(id, text); - token.makeFlyweight(); - } - item.pushToken(token); - } - } else { // found token2 - item.pushToken(token); - } - } + item = new FlyItem(id, text); + flyItems[id.ordinal()] = item; } - assert (token != null); // Should return non-null token - return token; + return item.flyToken(id, text); } public synchronized EmbeddingPresence embeddingPresence(T id) { @@ -322,27 +304,49 @@ return (TokenValidator[]) new TokenValidator[length]; } + private static final class FlyItem { - private TextToken token; + private TextToken token; // Most used (first candidate) - private TextToken token2; + private TextToken token2; // Second most used - public FlyItem(TextToken token) { - this.token = token; + FlyItem(T id, String text) { + newToken(id, text); + token2 = token; // Make both item non-null } - - public TextToken token() { + + TextToken flyToken(T id, String text) { + // First do a quick check for equality only in both items + if (token.text() != text) { + if (token2.text() == text) { + // Swap token and token2 => token will contain the right value + swap(); + } else { // token.text() != text && token2.text() != text + // Now deep-compare of text of both tokens + if (!CharSequenceUtilities.textEquals(token.text(), text)) { + if (!CharSequenceUtilities.textEquals(token2.text(), text)) { + token2 = token; + newToken(id, text); + } else { // swap + swap(); + } + } + } + } return token; } - public TextToken token2() { - return token2; + void newToken(T id, String text) { + // Create new token + token = new TextToken(id, text); + token.makeFlyweight(); } - public void pushToken(TextToken token) { - this.token2 = this.token; - this.token = token; + private void swap() { + TextToken tmp = token; + token = token2; + token2 = tmp; } } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/LexerInputOperation.java --- a/lexer/src/org/netbeans/lib/lexer/LexerInputOperation.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/LexerInputOperation.java Wed May 28 14:48:54 2008 +0200 @@ -41,21 +41,24 @@ package org.netbeans.lib.lexer; -import java.util.List; import java.util.Set; -import org.netbeans.api.lexer.InputAttributes; +import java.util.logging.Level; +import java.util.logging.Logger; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.PartType; import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.editor.util.GapList; -import org.netbeans.lib.lexer.token.ComplexToken; +import org.netbeans.lib.editor.util.CharSequenceUtilities; import org.netbeans.spi.lexer.Lexer; import org.netbeans.spi.lexer.LexerInput; import org.netbeans.lib.lexer.token.AbstractToken; -import org.netbeans.lib.lexer.token.ComplexToken; +import org.netbeans.lib.lexer.token.CustomTextToken; +import org.netbeans.lib.lexer.token.DefaultToken; +import org.netbeans.lib.lexer.token.PropertyToken; import org.netbeans.spi.lexer.LanguageHierarchy; import org.netbeans.spi.lexer.LexerRestartInfo; import org.netbeans.spi.lexer.TokenFactory; +import org.netbeans.spi.lexer.TokenPropertyProvider; /** * Implementation of the functionality related to lexer input. @@ -64,17 +67,25 @@ * @version 1.00 */ -public abstract class LexerInputOperation implements CharProvider { +public abstract class LexerInputOperation { + + // -J-Dorg.netbeans.lib.lexer.LexerInputOperation.level=FINE + static final Logger LOG = Logger.getLogger(LexerInputOperation.class.getName()); - /** Flag for additional correctness checks (may degrade performance). */ - private static final boolean testing = Boolean.getBoolean("netbeans.debug.lexer.test"); + protected final TokenList tokenList; /** - * Current reading index in the operation. - * At all times it must be >=0. + * Current reading index which usually corresponds to real offset. + *
+ * It should be set to its initial value in the constructor by descendants. */ - private int readIndex; + protected int readOffset; + /** + * A value that designates a start of a token being currently recognized. + */ + protected int tokenStartOffset; + /** * Maximum index from which the char was fetched for current * (or previous) tokens recognition. @@ -82,330 +93,300 @@ * The index is updated lazily - only when EOF is reached * and when backup() is called. */ - private int lookaheadIndex; + private int lookaheadOffset; /** - * Active preprocessor or null if there is no preprocessor. + * Token length computed by assignTokenLength(). */ - private CharPreprocessorOperation preprocessorOperation; + protected int tokenLength; - /** - * Computed and cached token length. - */ - private int tokenLength; + protected Lexer lexer; - private final TokenList tokenList; - - private final boolean mutableInput; - - private final Lexer lexer; - - /** - * Start of the token being currently recognized. - */ - private int tokenStartIndex; + protected final LanguageOperation innerLanguageOperation; - private boolean lexerFinished; /** * How many flyweight tokens were created in a row. */ - private int flySequenceLength; + private int flyTokenSequenceLength; - private List preprocessErrorList; - - /** - * Total count of preprocessors used during lexing. - * It's used to determine whether extra preprocessed chars need to be used. - */ - protected int preprocessingLevelCount; - - private CharProvider.ExtraPreprocessedChars extraPreprocessedChars; - - private Language language; - public LexerInputOperation(TokenList tokenList, int tokenIndex, Object lexerRestartState) { this.tokenList = tokenList; - this.mutableInput = (tokenList.modCount() != -1); - // Determine flySequenceLength setting - while (--tokenIndex >= 0 && LexerUtilsConstants.token( - tokenList, tokenIndex).isFlyweight() - ) { - flySequenceLength++; + LanguagePath languagePath = tokenList.languagePath(); + this.innerLanguageOperation = LexerUtilsConstants.innerLanguageOperation(languagePath); + + // Determine flyTokenSequenceLength setting + while (--tokenIndex >= 0 && tokenList.tokenOrEmbedding(tokenIndex).token().isFlyweight()) { + flyTokenSequenceLength++; } - - LanguagePath languagePath = tokenList.languagePath(); - language = LexerUtilsConstants.innerLanguage(languagePath); - LanguageHierarchy languageHierarchy = LexerApiPackageAccessor.get().languageHierarchy(language); + + LanguageHierarchy languageHierarchy = LexerApiPackageAccessor.get().languageHierarchy( + LexerUtilsConstants.innerLanguage(languagePath)); TokenFactory tokenFactory = LexerSpiPackageAccessor.get().createTokenFactory(this); - - // Check whether character preprocessing is necessary -// CharPreprocessor p = LexerSpiPackageAccessor.get().createCharPreprocessor(languageHierarchy); -// if (p != null) { -// preprocessingLevelCount++; -// preprocessorOperation = new CharPreprocessorOperation( -// ((preprocessorOperation != null) -// ? (CharProvider)preprocessorOperation -// : this), -// p, -// this -// ); -// } - - LexerInput lexerInput = LexerSpiPackageAccessor.get().createLexerInput( - (preprocessorOperation != null) ? preprocessorOperation : this); + LexerInput lexerInput = LexerSpiPackageAccessor.get().createLexerInput(this); LexerRestartInfo info = LexerSpiPackageAccessor.get().createLexerRestartInfo( lexerInput, tokenFactory, lexerRestartState, - tokenList.languagePath(), inputAttributes()); + languagePath, tokenList.inputAttributes()); lexer = LexerSpiPackageAccessor.get().createLexer(languageHierarchy, info); } - public abstract int read(int index); - - public abstract char readExisting(int index); - - public abstract void approveToken(AbstractToken token); - - public Set skipTokenIds() { - return tokenList.skipTokenIds(); - } + public abstract int read(int offset); + + public abstract char readExisting(int offset); + + /** + * Fill appropriate data like token list and offset into a non-flyweight token. + *
+ * This method should also move over the token's characters by increasing + * starting offset of the token and possibly other related variables. + * + * @param token non-null non-flyweight token. + */ + protected abstract void fillTokenData(AbstractToken token); public final int read() { - int c = read(readIndex++); + int c = read(readOffset++); if (c == LexerInput.EOF) { - lookaheadIndex = readIndex; // count EOF char into lookahead - readIndex--; // readIndex must not include EOF + lookaheadOffset = readOffset; // count EOF char into lookahead + readOffset--; // readIndex must not include EOF } return c; } - public int deepRawLength(int length) { - // No preprocessing by default - return length; + public final int readLength() { + return readOffset - tokenStartOffset; } - public int deepRawLengthShift(int index) { - // No preprocessing by default - return index; - } - - public final int readIndex() { - return readIndex; + public final char readExistingAtIndex(int index) { + return readExisting(tokenStartOffset + index); } public final void backup(int count) { - if (lookaheadIndex < readIndex) { - lookaheadIndex = readIndex; + if (lookaheadOffset < readOffset) { + lookaheadOffset = readOffset; } - readIndex -= count; + readOffset -= count; } + public final int lookahead() { + return (lookaheadOffset > readOffset) ? (lookaheadOffset - readOffset) : 0; + } + + public AbstractToken nextToken() { + while (true) { + AbstractToken token = (AbstractToken)lexer.nextToken(); + if (token == null) { + checkLexerInputFinished(); + return null; + } + // Check if token id of the new token belongs to the language + Language language = innerLanguageOperation.language(); + // Check that the id belongs to the language + if (!isSkipToken(token) && !language.tokenIds().contains(token.id())) { + String msgPrefix = "Invalid TokenId=" + token.id() + + " returned from lexer=" + + lexer + " for language=" + language + ":\n"; + if (token.id().ordinal() > language.maxOrdinal()) { + throw new IllegalStateException(msgPrefix + + "Language.maxOrdinal()=" + language.maxOrdinal() + " < " + token.id().ordinal()); + } else { // Ordinal ok but different id with that ordinal contained in language + throw new IllegalStateException(msgPrefix + + "Language contains no or different tokenId with ordinal=" + + token.id().ordinal() + ": " + language.tokenId(token.id().ordinal())); + } + } + // Skip token's chars + tokenStartOffset += tokenLength; + if (!isSkipToken(token)) + return token; + } // Continue to fetch non-skip token + } + /** - * Get a distance between the index of the rightmost character already returned - * by previous {@link #read()} operations and the present read index. - *
- * If there were no {@link #backup(int)} operation performed - * the lookahead will be zero except the case when EOF was already returned. - * - * @return >=0 number of characters between the rightmost reading index reached - * and the present read position. - *
- * The EOF (when reached by reading) is treated as a single character - * in lookahead. - *
- * If there is an active character preprocessor the returned value - * is a raw length of the lookahead. + * Used by token list updater after nextToken() to determine start offset of a token + * to be recognized next. Overriden for join token lists since join tokens + * may span multiple ETLs. + * + * @return start offset of a next token that would be recognized. */ - public final int lookahead() { - return (lookaheadIndex > readIndex) - ? ((preprocessorOperation != null) - ? preprocessorOperation.deepRawLength(lookaheadIndex - readIndex) - : (lookaheadIndex - readIndex)) - : 0; + public int lastTokenEndOffset() { + return tokenStartOffset; + } + + public AbstractToken getFlyweightToken(T id, String text) { + assert (text.length() <= readLength()); + // Compare each recognized char with the corresponding char in text + if (LOG.isLoggable(Level.FINE)) { + for (int i = 0; i < text.length(); i++) { + if (text.charAt(i) != readExisting(i)) { + throw new IllegalArgumentException("Flyweight text in " + // NOI18N + "TokenFactory.getFlyweightToken(" + id + ", \"" + // NOI18N + CharSequenceUtilities.debugText(text) + "\") " + // NOI18N + "differs from recognized text: '" + // NOI18N + CharSequenceUtilities.debugChar(readExisting(i)) + + "' != '" + CharSequenceUtilities.debugChar(text.charAt(i)) + // NOI18N + "' at index=" + i // NOI18N + ); + } + } + } + + assignTokenLength(text.length()); + AbstractToken token; + if ((token = checkSkipToken(id)) == null) { + if (isFlyTokenAllowed()) { + token = innerLanguageOperation.getFlyweightToken(id, text); + flyTokenSequenceLength++; + } else { // Create regular token + token = createDefaultTokenInstance(id); + fillTokenData(token); + flyTokenSequenceLength = 0; + } + } + return token; } - public final int tokenLength() { + private AbstractToken checkSkipToken(T id) { + if (isSkipTokenId(id)) { + // Prevent fly token occurrence after skip token to have a valid offset + flyTokenSequenceLength = LexerUtilsConstants.MAX_FLY_SEQUENCE_LENGTH; + return skipToken(); + } + return null; + } + + public AbstractToken createToken(T id, int length) { + assignTokenLength(length); + AbstractToken token; + if ((token = checkSkipToken(id)) == null) { + token = createDefaultTokenInstance(id); + fillTokenData(token); + flyTokenSequenceLength = 0; + } + return token; + } + + protected AbstractToken createDefaultTokenInstance(T id) { + return new DefaultToken(id, tokenLength); + } + + public AbstractToken createToken(T id, int length, PartType partType) { + if (partType == null) + throw new IllegalArgumentException("partType must be non-null"); + if (partType == PartType.COMPLETE) + return createToken(id, length); + + return createPropertyToken(id, length, null, partType); + } + + public AbstractToken createPropertyToken(T id, int length, + TokenPropertyProvider propertyProvider, PartType partType) { + if (partType == null) + partType = PartType.COMPLETE; + + assignTokenLength(length); + AbstractToken token; + if ((token = checkSkipToken(id)) == null) { + token = createPropertyTokenInstance(id, propertyProvider, partType); + fillTokenData(token); + flyTokenSequenceLength = 0; + } + return token; + } + + protected AbstractToken createPropertyTokenInstance(T id, + TokenPropertyProvider propertyProvider, PartType partType) { + return new PropertyToken(id, tokenLength, propertyProvider, partType); + } + + public AbstractToken createCustomTextToken(T id, int length, CharSequence customText) { + assignTokenLength(length); + AbstractToken token; + if ((token = checkSkipToken(id)) == null) { + token = createCustomTextTokenInstance(id, customText); + fillTokenData(token); + flyTokenSequenceLength = 0; + } + return token; + } + + protected AbstractToken createCustomTextTokenInstance(T id, CharSequence customText) { + return new CustomTextToken(id, customText, tokenLength); + } + + public boolean isSkipTokenId(T id) { + Set skipTokenIds = tokenList.skipTokenIds(); + return (skipTokenIds != null && skipTokenIds.contains(id)); + } + + protected final int tokenLength() { return tokenLength; } - - public void tokenRecognized(int tokenLength) { - if (tokenLength > readIndex()) { + + public void assignTokenLength(int tokenLength) { + if (tokenLength > readLength()) { throw new IndexOutOfBoundsException("tokenLength=" + tokenLength // NOI18N - + " >" + readIndex()); + + " >" + readLength()); } this.tokenLength = tokenLength; - } - - public void tokenApproved() { - tokenStartIndex += tokenLength; - readIndex -= tokenLength; - lookaheadIndex -= tokenLength; - } - - protected final TokenList tokenList() { - return tokenList; - } - - protected final int tokenStartIndex() { - return tokenStartIndex; - } - - public final void setTokenStartIndex(int tokenStartIndex) { - this.tokenStartIndex = tokenStartIndex; - } - - protected final CharPreprocessorOperation preprocessor() { - return preprocessorOperation; - } - - public final boolean isMutableInput() { - return mutableInput; - } - - public final boolean isStoreLookaheadAndState() { - return isMutableInput() || testing; - } - - public AbstractToken nextToken() { - assert (!lexerFinished); - while (true) { - @SuppressWarnings("unchecked") - AbstractToken token = (AbstractToken)lexer().nextToken(); - if (token == null) { - LexerUtilsConstants.checkLexerInputFinished( - (preprocessorOperation != null) ? (CharProvider)preprocessorOperation : this, this); - lexerFinished = true; - return null; - } else { - // Check that the id belongs to the language - if (token != TokenFactory.SKIP_TOKEN && !language.tokenIds().contains(token.id())) { - String msgPrefix = "Invalid TokenId=" + token.id() - + " returned from lexer=" - + lexer() + " for language=" + language + ":\n"; - if (token.id().ordinal() > language.maxOrdinal()) { - throw new IllegalStateException(msgPrefix + - "Language.maxOrdinal()=" + language.maxOrdinal() + " < " + token.id().ordinal()); - } else { // Ordinal ok but different id with that ordinal contained in language - throw new IllegalStateException(msgPrefix + - "Language contains no or different tokenId with ordinal=" - + token.id().ordinal() + ": " + language.tokenId(token.id().ordinal())); - } - } - approveToken(token); - } - if (token == TokenFactory.SKIP_TOKEN) - continue; // Fetch next token - return token; - } - } - - /** - * Notification that the token was recognized. - * @param tokenLength length of the recognized token. - * @param skip whether the token should be skipped - * @return true if the token holding preprocessed text should be created. - * If skip is true then false is returned. - */ - public final boolean tokenRecognized(int tokenLength, boolean skip) { - if (preprocessorOperation != null) { - preprocessorOperation.tokenRecognized(tokenLength); - } else { // no preprocessor - tokenRecognized(tokenLength); - } - - // If the token is not skipped check whether preprocessed token - // should be created instead of the regular token. - if (!skip && tokenLength != this.tokenLength - || (preprocessErrorList != null - && preprocessErrorList.get(0).index() < this.tokenLength) - ) { - if (extraPreprocessedChars == null && preprocessingLevelCount > 1) { - // For more than one preprocessing level need to handle - // extra preprocessed chars before and after the main ones - // on the parent levels. - extraPreprocessedChars = new CharProvider.ExtraPreprocessedChars(); - } - return true; - } - return false; - } - - public void notifyPreprocessorError(CharPreprocessorError error) { - if (preprocessErrorList == null) { - preprocessErrorList = new GapList(); - } - preprocessErrorList.add(error); - } - -// public final void initPreprocessedToken(AbstractToken token) { -// CharPreprocessorError error = null; -// if (preprocessErrorList != null && preprocessErrorList.size() > 0) { -// for (int i = preprocessErrorList.size() - 1; i >= 0; i--) { -// error = preprocessErrorList.get(i); -// if (error.index() < tokenLength) { -// preprocessErrorList.remove(i); -// } else {// Above errors for this token -// // Relocate - subtract token length -// error.updateIndex(-tokenLength); -// error = null; -// } -// } -// } -// -// PreprocessedTextStorage storage = preprocessorOperation.createPreprocessedTextStorage( -// token.text(), extraPreprocessedChars); -// -// if (token.getClass() == ComplexToken.class) { -// ((ComplexToken)token).initPrep(storage, error); -// } else { -// ((PreprocessedTextToken)token).initPrep(storage, error); -// } -// } - - public void collectExtraPreprocessedChars(CharProvider.ExtraPreprocessedChars epc, - int prepStartIndex, int prepEndIndex, int topPrepEndIndex) { - // No extra preprocessed characters - } - - public final LanguageOperation languageOperation() { - return LexerUtilsConstants.innerLanguageOperation(tokenList.languagePath()); } public final Object lexerState() { return lexer.state(); } - public final boolean isFlyTokenAllowed() { - return (flySequenceLength < LexerUtilsConstants.MAX_FLY_SEQUENCE_LENGTH); + protected boolean isFlyTokenAllowed() { + return (flyTokenSequenceLength < LexerUtilsConstants.MAX_FLY_SEQUENCE_LENGTH); } - protected final void flyTokenAdded() { - flySequenceLength++; + public final boolean isSkipToken(AbstractToken token) { + return (token == LexerUtilsConstants.SKIP_TOKEN); } - protected final void preventFlyToken() { - flySequenceLength = LexerUtilsConstants.MAX_FLY_SEQUENCE_LENGTH; + @SuppressWarnings("unchecked") + public final AbstractToken skipToken() { + return (AbstractToken)LexerUtilsConstants.SKIP_TOKEN; + } + + /** + * Release the underlying lexer. This method can be called multiple times. + */ + public final void release() { + if (lexer != null) { + lexer.release(); + lexer = null; + } } - protected final void clearFlySequence() { - flySequenceLength = 0; + /** + * Check that there are no more characters to be read from the given + * lexer input operation. + */ + private void checkLexerInputFinished() { + if (read() != LexerInput.EOF) { + throw new IllegalStateException( + "Lexer " + lexer + // NOI18N + " returned null token" + // NOI18N + " but EOF was not read from lexer input yet." + // NOI18N + " Fix the lexer."// NOI18N + ); + } + if (readLength() > 0) { + throw new IllegalStateException( + "Lexer " + lexer + // NOI18N + " returned null token but lexerInput.readLength()=" + // NOI18N + readLength() + + " - these characters need to be tokenized." + // NOI18N + " Fix the lexer." // NOI18N + ); + } } - - protected final boolean isSkipToken(AbstractToken token) { - return (token == TokenFactory.SKIP_TOKEN); + + @Override + public String toString() { + return "tokenStartOffset=" + tokenStartOffset + ", readOffset=" + readOffset + // NOI18N + ", lookaheadOffset=" + lookaheadOffset; } - - public final Lexer lexer() { - return lexer; - } - - public final InputAttributes inputAttributes() { - return tokenList.inputAttributes(); - } - - public final void release() { - lexer.release(); - } - + } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/LexerSpiPackageAccessor.java --- a/lexer/src/org/netbeans/lib/lexer/LexerSpiPackageAccessor.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/LexerSpiPackageAccessor.java Wed May 28 14:48:54 2008 +0200 @@ -107,7 +107,7 @@ public abstract boolean isRetainTokenText(LanguageHierarchy languageHierarchy, T id); - public abstract LexerInput createLexerInput(CharProvider charProvider); + public abstract LexerInput createLexerInput(LexerInputOperation operation); public abstract Language language(MutableTextInput mti); diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/LexerUtilsConstants.java --- a/lexer/src/org/netbeans/lib/lexer/LexerUtilsConstants.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/LexerUtilsConstants.java Wed May 28 14:48:54 2008 +0200 @@ -45,15 +45,14 @@ import org.netbeans.api.lexer.InputAttributes; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenHierarchy; import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.ArrayUtilities; -import org.netbeans.lib.lexer.inc.FilterSnapshotTokenList; import org.netbeans.lib.lexer.inc.SnapshotTokenList; +import org.netbeans.lib.lexer.inc.TokenHierarchyEventInfo; import org.netbeans.spi.lexer.LanguageHierarchy; -import org.netbeans.spi.lexer.LexerInput; import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.token.TextToken; import org.netbeans.spi.lexer.LanguageEmbedding; /** @@ -75,54 +74,62 @@ public static final int MAX_FLY_SEQUENCE_LENGTH = 5; /** - * Minimum number of characters that will be lexed - * at once in a mutable input setup. - *
- * The created tokens will be notified in one token change event. - *
- * This should roughly cover a single page with text - * (so that an initial page of text is lexed at once) - * but it's not strictly necessary. + * Token list's modCount for the case when the source input is unmodifiable. */ - public static final int MIN_LEXED_AREA_LENGTH = 4096; + public static final int MOD_COUNT_IMMUTABLE_INPUT = -1; /** - * Fraction of the mutable input size that will be lexed at once. - *
- * This should avoid notifying of token creations too many times - * for large inputs. + * ModCount when the particular token list was removed from the token hierarchy. */ - public static final int LEXED_AREA_INPUT_SIZE_FRACTION = 10; + public static final int MOD_COUNT_REMOVED = -2; /** - * Marker value to recognize an uninitialized state variable. + * Maximum token length that has the TokenLength objects cached by TokenLength.CACHE. */ - public static final Object INVALID_STATE = new Object(); + public static final int MAX_CACHED_TOKEN_LENGTH = 200; /** - * Check that there are no more characters to be read from the given - * lexer input operation. + * Threshold (used by TokenLength) above which the DefaultToken implementations will + * start to cache the Token.text().toString() result in itself. */ - public static void checkLexerInputFinished(CharProvider input, LexerInputOperation operation) { - if (input.read() != LexerInput.EOF) { - throw new IllegalStateException( - "Lexer " + operation.lexer() + // NOI18N - " returned null token" + // NOI18N - " but EOF was not read from lexer input yet." + // NOI18N - " Fix the lexer."// NOI18N - ); - } - if (input.readIndex() > 0) { - throw new IllegalStateException( - "Lexer " + operation.lexer() + // NOI18N - " returned null token but lexerInput.readLength()=" + // NOI18N - input.readIndex() + - " - these characters need to be tokenized." + // NOI18N - " Fix the lexer." // NOI18N - ); - } + public static final short CACHE_TOKEN_TO_STRING_THRESHOLD = 900; + + /** + * Threshold similar to TOKEN_TEXT_STRING_THRESHOLD but for a case when a root token list's text + * is a String instance. In that case a String.substring(start, end) will be used + * which is considerably cheaper than a regular case because the character data + * will be shared with the root text and there will be no character copying. + */ + public static final short INPUT_TEXT_STRING_THRESHOLD = 300; + + /** + * Used by TokenLength as a measure of a String instance production. + */ + public static final short TOKEN_LENGTH_STRING_CREATION_FACTOR = 50; + + /** + * Initial size of a buffer for copying a text of a Reader. + */ + public static final int READER_TEXT_BUFFER_SIZE = 4096; + + static { + // Require the following to only use THRESHOLD in certain checks + assert (CACHE_TOKEN_TO_STRING_THRESHOLD >= INPUT_TEXT_STRING_THRESHOLD); } + + public static final AbstractToken SKIP_TOKEN + = new TextToken( + new TokenIdImpl("skip-token-id; special id of TokenFactory.SKIP_TOKEN; " + // NOI18N + " It should never be part of token sequence", 0, null), // NOI18N + "" // empty skip token text NOI18N + ); + /** + * Initial embedded token list's modCount prior it was synced + * with the root token list's modCount. + */ + public static final int MOD_COUNT_EMBEDDED_INITIAL = -3; + public static void tokenLengthZeroOrNegative(int tokenLength) { if (tokenLength == 0) { throw new IllegalArgumentException( @@ -195,7 +202,7 @@ * and if no embedding is found then the LanguageProvider.findLanguageEmbedding(). */ public static LanguageEmbedding - findEmbedding(LanguageHierarchy languageHierarchy, Token token, + findEmbedding(LanguageHierarchy languageHierarchy, AbstractToken token, LanguagePath languagePath, InputAttributes inputAttributes) { LanguageEmbedding embedding = LexerSpiPackageAccessor.get().embedding( @@ -210,25 +217,6 @@ return embedding; } - /** - * Returns token from the given object which is either the token - * or an embedding container. - *
- * The method casts the resulting token to the generic type requested by the caller. - */ - public static AbstractToken token(Object tokenOrEmbeddingContainer) { - @SuppressWarnings("unchecked") - AbstractToken token = (AbstractToken) - ((tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) - ? ((EmbeddingContainer)tokenOrEmbeddingContainer).token() - : (AbstractToken)tokenOrEmbeddingContainer); - return token; - } - - public static AbstractToken token(TokenList tokenList, int index) { - return token(tokenList.tokenOrEmbeddingContainer(index)); - } - public static int maxLanguagePathSize(Set paths) { int maxPathSize = 0; for (LanguagePath lp : paths) { @@ -236,27 +224,155 @@ } return maxPathSize; } + + /** + * Get index of the token that "contains" the given offset. + * If the offset is beyond the existing tokens the method asks + * for next tokens by tokenList.tokenOrEmbedding(). + * + * @param offset offset for which the token index should be found. + * @return array of two items where the [0] is token's index and [1] is its offset. + *
+ * If offset >= last-token-end-offset then [0] contains token-count and + * [1] conains last-token-end-offset. + *
+ * [0] may contain -1 to indicate that there are no tokens in the token list + * ([1] then contains zero). + */ + public static int[] tokenIndexLazyTokenCreation(TokenList tokenList, int offset) { + // Token count in the list may change as possibly other threads + // keep asking for tokens. Root token list impls create tokens lazily + // when asked by clients. + // The intent is to not force creation of all token (because of using a binary search) + // so first a last token is checked whether it covers the requested offset. + int tokenCount = tokenList.tokenCountCurrent(); // presently created token count + if (tokenCount == 0) { // no tokens yet -> attempt to create at least one + if (tokenList.tokenOrEmbedding(0) == null) { // really no tokens at all + return new int[] { -1, 0 }; + } + // Re-get the present token count (could be created a chunk of tokens at once) + tokenCount = tokenList.tokenCountCurrent(); + } + + // tokenCount surely >0 + int prevTokenOffset = tokenList.tokenOffsetByIndex(tokenCount - 1); + if (offset > prevTokenOffset) { // may need to create further tokens if they do not exist + // Force token list to create subsequent tokens + // Cannot subtract offset by each token's length because + // there may be gaps between tokens due to token id filter use. + int tokenLength = tokenList.tokenOrEmbedding(tokenCount - 1).token().length(); + while (offset >= prevTokenOffset + tokenLength) { // above present token + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(tokenCount); + if (tokenOrEmbedding != null) { + AbstractToken t = tokenOrEmbedding.token(); + if (t.isFlyweight()) { // need to use previous tokenLength + prevTokenOffset += tokenLength; + } else { // non-flyweight token - retrieve offset + prevTokenOffset = tokenList.tokenOffsetByIndex(tokenCount); + } + tokenLength = t.length(); + tokenCount++; + + } else { // no more tokens => position behind last token + return new int[] { tokenCount, prevTokenOffset + tokenLength }; + } + } + return new int[] { tokenCount - 1, prevTokenOffset }; + } + // Now do a regular binary search + return tokenIndexBinSearch(tokenList, offset, tokenCount); + } + + /** + * Get index of the token that "contains" the given offset by using binary search + * in existing tokens. + * + * @param offset offset for which the token index should be found. + * @return array of two items where the [0] is token's index and [1] is its offset. + *
+ * If offset >= last-token-end-offset then [0] contains token-count and + * [1] conains last-token-end-offset. + *
+ * [0] may contain -1 to indicate that there are no tokens in the token list + * ([1] then contains zero). + */ + public static int[] tokenIndexBinSearch(TokenList tokenList, int offset, int tokenCount) { + // The offset is within the currently recognized tokens + // Use binary search + int low = 0; + int high = tokenCount - 1; + int mid = -1; + int midStartOffset = -1; + while (low <= high) { + mid = (low + high) >>> 1; + midStartOffset = tokenList.tokenOffsetByIndex(mid); + + if (midStartOffset < offset) { + low = mid + 1; + } else if (midStartOffset > offset) { + high = mid - 1; + } else { + // Token starting exactly at offset found + return new int[] { mid, midStartOffset}; // right at the token begining + } + } + + // Not found exactly and high + 1 == low => high < low + // BTW there may be gaps between tokens; if offset is in gap then position to lower token + if (high >= 0) { // could be -1 + if (low == tokenCount) { // Could be beyond end of last token + AbstractToken t = tokenList.tokenOrEmbedding(high).token(); + // Use current midStartOffset + if (offset >= midStartOffset + t.length()) { // beyond end of last token + // Offset in the gap above the "high" token + high++; + midStartOffset += t.length(); + } else if (mid != high) { + midStartOffset = tokenList.tokenOffsetByIndex(high); + } + } else if (mid != high) { + midStartOffset = tokenList.tokenOffsetByIndex(high); + } + } else { // high == -1 => mid == 0 + if (tokenCount == 0) { // Need to return -1 + return new int[] { -1, 0 }; + } + high = 0; + // Use current midStartOffset + } + return new int[] { high, midStartOffset }; + } + + public static int updatedStartOffset(EmbeddedTokenList etl, TokenHierarchyEventInfo eventInfo) { + etl.embeddingContainer().updateStatusUnsync(); + int startOffset = etl.startOffset(); + return (etl.isRemoved() && startOffset > eventInfo.modOffset()) + ? Math.max(startOffset - eventInfo.removedLength(), eventInfo.modOffset()) + : startOffset; + } public static StringBuilder appendTokenList(StringBuilder sb, TokenList tokenList) { - return appendTokenList(sb, tokenList, -1, 0, Integer.MAX_VALUE, true, 0); + return appendTokenList(sb, tokenList, -1, 0, Integer.MAX_VALUE, true, 0, true); } public static StringBuilder appendTokenListIndented( StringBuilder sb, TokenList tokenList, int indent ) { - return appendTokenList(sb, tokenList, -1, 0, Integer.MAX_VALUE, false, indent); + return appendTokenList(sb, tokenList, -1, 0, Integer.MAX_VALUE, false, indent, true); } public static StringBuilder appendTokenList(StringBuilder sb, - TokenList tokenList, int currentIndex, int startIndex, int endIndex, boolean appendEmbedded, int indent) { + TokenList tokenList, int currentIndex, int startIndex, int endIndex, + boolean appendEmbedded, int indent, boolean dumpTokenText + ) { if (sb == null) { - sb = new StringBuilder(); + sb = new StringBuilder(200); } TokenHierarchy tokenHierarchy; if (tokenList instanceof SnapshotTokenList) { - tokenHierarchy = ((SnapshotTokenList)tokenList).snapshot().tokenHierarchy(); + tokenHierarchy = ((SnapshotTokenList)tokenList).snapshot().tokenHierarchy(); } else { - tokenHierarchy = null; + tokenHierarchy = null; } endIndex = Math.min(tokenList.tokenCountCurrent(), endIndex); @@ -266,7 +382,7 @@ sb.append((i == currentIndex) ? '*' : 'T'); ArrayUtilities.appendBracketedIndex(sb, i, digitCount); appendTokenInfo(sb, tokenList, i, tokenHierarchy, - appendEmbedded, indent); + appendEmbedded, indent, dumpTokenText); sb.append('\n'); } return sb; @@ -277,96 +393,31 @@ || (state1 != null && state1.equals(state2)); } - /** - * Get end state of the given token list that may be used for relexing - * of the next section. - *
- * If the section is empty but it does not join the sections then null state - * is returned. - * - * @param tokenList non-null token list. - * @return end state or {@link #INVALID_STATE} if previous token list must be queried. - */ - public static Object endState(EmbeddedTokenList tokenList) { - int tokenCount = tokenList.tokenCount(); - return (tokenCount > 0) - ? tokenList.state(tokenList.tokenCount() - 1) - : tokenList.embedding().joinSections() ? INVALID_STATE : null; - } - - /** - * Get end state of the given token list that may be used for relexing - * of the next section. - *
- * If the section is empty but it does not join the sections then null state - * is returned. - * - * @param tokenList non-null token list. - * @param lastEndState current state that will be overriden in case this section - * is not empty while joining the sections. - * @return end state or {@link #INVALID_STATE} if previous token list must be queried. - */ - public static Object endState(EmbeddedTokenList tokenList, Object lastEndState) { - int tokenCount = tokenList.tokenCount(); - return (tokenCount > 0) - ? tokenList.state(tokenList.tokenCount() - 1) - : tokenList.embedding().joinSections() ? lastEndState : null; - } - public static String idToString(TokenId id) { return id.name() + '[' + id.ordinal() + ']'; // NOI18N; } - public static TokenList embeddedTokenList( - TokenList tokenList, int tokenIndex, Language embeddedLanguage) { - TokenList embeddedTokenList - = EmbeddingContainer.embeddedTokenList(tokenList, tokenIndex, embeddedLanguage); - if (embeddedTokenList != null) { - ((EmbeddedTokenList)embeddedTokenList).embeddingContainer().updateStatus(); - TokenList tl = tokenList; - if (tokenList.getClass() == SubSequenceTokenList.class) { - tl = ((SubSequenceTokenList)tokenList).delegate(); - } - - if (tl.getClass() == FilterSnapshotTokenList.class) { - embeddedTokenList = new FilterSnapshotTokenList(embeddedTokenList, - ((FilterSnapshotTokenList)tl).tokenOffsetDiff()); - - } else if (tl.getClass() == SnapshotTokenList.class) { - Token token = token(tokenList, tokenIndex); - embeddedTokenList = new FilterSnapshotTokenList(embeddedTokenList, - tokenList.tokenOffset(tokenIndex) - token.offset(null)); - } - return embeddedTokenList; - } - return null; - } - - public static void appendTokenInfo(StringBuilder sb, TokenList tokenList, int index, - TokenHierarchy tokenHierarchy, boolean appendEmbedded, int indent + public static void appendTokenInfo(StringBuilder sb, + TokenList tokenList, int index, + TokenHierarchy tokenHierarchy, boolean appendEmbedded, int indent, + boolean dumpTokenText ) { - appendTokenInfo(sb, tokenList.tokenOrEmbeddingContainer(index), + appendTokenInfo(sb, tokenList.tokenOrEmbedding(index), tokenList.lookahead(index), tokenList.state(index), - tokenHierarchy, appendEmbedded, indent); + tokenHierarchy, appendEmbedded, indent, dumpTokenText); } - public static void appendTokenInfo(StringBuilder sb, Object tokenOrEmbeddingContainer, - int lookahead, Object state, - TokenHierarchy tokenHierarchy, boolean appendEmbedded, int indent + public static void appendTokenInfo(StringBuilder sb, + TokenOrEmbedding tokenOrEmbedding, int lookahead, Object state, + TokenHierarchy tokenHierarchy, boolean appendEmbedded, int indent, + boolean dumpTokenText ) { - if (tokenOrEmbeddingContainer == null) { + if (tokenOrEmbedding == null) { sb.append(""); } else { // regular token - Token token; - EmbeddingContainer ec; - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - ec = (EmbeddingContainer)tokenOrEmbeddingContainer; - token = ec.token(); - } else { - ec = null; - token = (Token)tokenOrEmbeddingContainer; - } - sb.append(((AbstractToken)token).dumpInfo(tokenHierarchy)); + EmbeddingContainer ec = tokenOrEmbedding.embedding(); + AbstractToken token = tokenOrEmbedding.token(); + token.dumpInfo(sb, tokenHierarchy, dumpTokenText, indent); appendLAState(sb, lookahead, state); sb.append(", "); appendIdentityHashCode(sb, token); @@ -384,7 +435,7 @@ ArrayUtilities.appendSpaces(sb, indent); sb.append("Embedding[").append(index).append("]: \"").append(etl.languagePath().mimePath()).append("\"\n"); if (appendEmbedded) { - appendTokenList(sb, etl, -1, 0, Integer.MAX_VALUE, appendEmbedded, indent); + appendTokenList(sb, etl, -1, 0, Integer.MAX_VALUE, appendEmbedded, indent, true); } etl = etl.nextEmbeddedTokenList(); index++; @@ -413,6 +464,129 @@ } } + public static String checkConsistencyTokenList(TokenList tokenList, boolean checkEmbedded) { + return checkConsistencyTokenList(tokenList, checkEmbedded, ArrayUtilities.emptyIntArray(), tokenList.startOffset()); + } + + private static String checkConsistencyTokenList(TokenList tokenList, boolean checkEmbedded, + int[] parentIndexes, int firstTokenOffset) { + int tokenCountCurrent = tokenList.tokenCountCurrent(); + int[] indexes = ArrayUtilities.intArray(parentIndexes, parentIndexes.length + 1); + boolean continuous = tokenList.isContinuous(); + int lastOffset = firstTokenOffset; + for (int i = 0; i < tokenCountCurrent; i++) { + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(i); + if (tokenOrEmbedding == null) { + return dumpContext("Null token", tokenList, i, parentIndexes); // NOI18N + } + AbstractToken token = tokenOrEmbedding.token(); + if (token.isRemoved()) { + return dumpContext("Token is removed", tokenList, i, parentIndexes); + } + // Check whether tokenList.startOffset() corresponds to the start of first token + if (i == 0 && continuous && tokenCountCurrent > 0 && !token.isFlyweight()) { + if (token.offset(null) != tokenList.startOffset()) { + return dumpContext("firstToken.offset()=" + token.offset(null) + + " != tokenList.startOffset()=" + tokenList.startOffset(), + tokenList, i, parentIndexes); + } + } + if (!token.isFlyweight() && token.tokenList() != tokenList && !(tokenList instanceof JoinTokenList)) { + return dumpContext("Invalid token.tokenList()=" + token.tokenList(), + tokenList, i, parentIndexes); + } + if (token.text() == null) { + return dumpContext("Null token.text()=" + token.tokenList(), + tokenList, i, parentIndexes); + } + int offset = (token.isFlyweight()) ? lastOffset : token.offset(null); + if (offset < 0) { + return dumpContext("Token offset=" + offset + " < 0", tokenList, i, parentIndexes); // NOI18N + } + if (offset < lastOffset) { + return dumpContext("Token offset=" + offset + " < lastOffset=" + lastOffset, + tokenList, i, parentIndexes); + } + if (offset > lastOffset && continuous) { + return dumpContext("Gap between tokens; offset=" + offset + ", lastOffset=" + lastOffset, + tokenList, i, parentIndexes); + } + lastOffset = offset + token.length(); + EmbeddingContainer ec = tokenOrEmbedding.embedding(); + if (ec != null && checkEmbedded) { + EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); + while (etl != null) { + String error = checkConsistencyTokenList(etl, checkEmbedded, indexes, + offset + etl.embedding().startSkipLength()); + if (error != null) + return error; + etl = etl.nextEmbeddedTokenList(); + } + } + } + return null; + } + + private static String dumpContext(String msg, TokenList tokenList, int index, int[] parentIndexes) { + StringBuilder sb = new StringBuilder(); + sb.append(msg); + sb.append(" at index="); // NOI18N + sb.append(index); + sb.append(" of tokens of language "); // NOI18N + sb.append(tokenList.languagePath().innerLanguage().mimeType()); + sb.append('\n'); + LexerUtilsConstants.appendTokenList(sb, tokenList, index, index - 2, index + 3, false, 0, true); + sb.append("\nParents:\n"); // NOI18N + sb.append(tracePath(parentIndexes, tokenList)); + return sb.toString(); + } + + public static String findTokenContext(AbstractToken token) { + return findTokenContext(token, token.tokenList().rootTokenList(), ArrayUtilities.emptyIntArray()); + } + + private static String findTokenContext(AbstractToken token, TokenList tokenList, int[] parentIndexes) { + int tokenCountCurrent = tokenList.tokenCountCurrent(); + int[] indexes = ArrayUtilities.intArray(parentIndexes, parentIndexes.length + 1); + for (int i = 0; i < tokenCountCurrent; i++) { + TokenOrEmbedding tokenOrEmbedding = tokenList.tokenOrEmbedding(i); + if (tokenOrEmbedding == null) { + continue; + } + EmbeddingContainer ec = tokenOrEmbedding.embedding(); + if (ec != null) { + if (ec.token() == token) { + return dumpContext("Token found.", tokenList, i, indexes); + } + EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); + while (etl != null) { + String context = findTokenContext(token, etl, indexes); + if (context != null) + return context; + etl = etl.nextEmbeddedTokenList(); + } + + } else if (tokenOrEmbedding == token) { + return dumpContext("Token found.", tokenList, i, indexes); + } + } + return null; + } + + private static String tracePath(int[] indexes, TokenList tokenList) { + StringBuilder sb = new StringBuilder(); + LanguagePath languagePath = tokenList.languagePath(); + TokenList rootTokenList = tokenList.rootTokenList(); + tokenList = rootTokenList; + for (int i = 0; i < indexes.length; i++) { + appendTokenInfo(sb, tokenList, i, rootTokenList.tokenHierarchyOperation().tokenHierarchy(), false, 0, true); + // Assign language to variable to get rid of javac bug for incremental compilation on 1.5 + Language language = languagePath.language(i); + tokenList = EmbeddingContainer.embeddedTokenList(tokenList, indexes[i], language, true); + } + return sb.toString(); + } + private LexerUtilsConstants() { // no instances } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/PreprocessedTextLexerInputOperation.java --- a/lexer/src/org/netbeans/lib/lexer/PreprocessedTextLexerInputOperation.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/PreprocessedTextLexerInputOperation.java Wed May 28 14:48:54 2008 +0200 @@ -64,37 +64,23 @@ private int lastRawLengthShift; private int tokenEndRawLengthShift; - - public PreprocessedTextLexerInputOperation(TokenList tokenList, PreprocessedTextStorage prepText) { - this(tokenList, 0, null, prepText, 0, 0, prepText.length()); - } + + private int tokenStartIndex; // Extra added to compile public PreprocessedTextLexerInputOperation(TokenList tokenList, int tokenIndex, Object lexerRestartState, PreprocessedTextStorage prepText, int prepTextStartOffset, int startOffset, int endOffset) { - super(tokenList, tokenIndex, lexerRestartState, prepText, - prepTextStartOffset, startOffset, endOffset); + super(tokenList, tokenIndex, lexerRestartState, startOffset, endOffset); this.preprocessedText = prepText; int index = startOffset - prepTextStartOffset; if (index > 0) { tokenStartRawLengthShift = preprocessedText.rawLengthShift(index); lastRawLengthShift = tokenStartRawLengthShift; } - preprocessingLevelCount++; // extra level of preprocessing - } - - public int deepRawLength(int length) { - return length + preprocessedText.rawLengthShift(tokenStartIndex() + length - 1) - - tokenStartRawLengthShift; - } - - public int deepRawLengthShift(int index) { - return preprocessedText.rawLengthShift(tokenStartIndex() + index) - - tokenStartRawLengthShift; } public int read(int index) { // index >= 0 is guaranteed by contract - index += tokenStartIndex(); + index += tokenStartIndex; if (index < readEndIndex()) { // Check whether the char is preprocessed int rls = preprocessedText.rawLengthShift(index); @@ -111,13 +97,12 @@ } } - public void tokenRecognized(int tokenLength) { - super.tokenRecognized(tokenLength); + public void assignTokenLength(int tokenLength) { tokenEndRawLengthShift = preprocessedText.rawLengthShift( - tokenStartIndex() + tokenLength() - 1); + tokenStartIndex + tokenLength - 1); } - public void tokenApproved() { + protected void tokenApproved() { // Increase base raw length shift by the token's last-char shift tokenStartRawLengthShift += tokenEndRawLengthShift; @@ -135,7 +120,6 @@ prepEndIndex -= tokenLength(); } } - super.tokenApproved(); } public void collectExtraPreprocessedChars(CharProvider.ExtraPreprocessedChars epc, @@ -153,7 +137,7 @@ // for the present token and the ending chars could possibly // be non-preprocessed (prepEndIndex > tokenLength) while (--i >= prepStartIndex && postCount > 0 - && preprocessedText.rawLengthShift(i + tokenStartIndex()) == tokenEndRawLengthShift + && preprocessedText.rawLengthShift(i + tokenStartIndex) == tokenEndRawLengthShift ) { // not preprocessed postCount--; } @@ -167,11 +151,11 @@ assert (preCount >= 0 && postCount >= 0); epc.ensureExtraLength(preCount + postCount); while (--preCount >= 0) { - epc.insert(readExisting(prepStartIndex - 1), deepRawLength(prepStartIndex) - prepStartIndex); +// epc.insert(readExisting(prepStartIndex - 1), deepRawLength(prepStartIndex) - prepStartIndex); prepStartIndex--; } while (--postCount >= 0) { - epc.append(readExisting(prepEndIndex), deepRawLength(prepEndIndex) - topPrepEndIndex); +// epc.append(readExisting(prepEndIndex), deepRawLength(prepEndIndex) - topPrepEndIndex); prepEndIndex++; topPrepEndIndex++; } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/SubSequenceTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/SubSequenceTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/SubSequenceTokenList.java Wed May 28 14:48:54 2008 +0200 @@ -110,84 +110,30 @@ // No upper bound for end index so use tokenCount() (can be improved if desired) limitEndIndex = tokenList.tokenCount(); } else { // Valid limit end offset - limitEndIndex = tokenList.tokenCountCurrent(); // presently created token count - if (limitEndIndex == 0) { // no tokens yet -> attempt to create at least one - if (tokenList.tokenOrEmbeddingContainer(0) != null) { // some tokens exist - // Re-get the present token count (could be created a chunk of tokens at once) - limitEndIndex = tokenList.tokenCountCurrent(); + int[] indexAndTokenOffset = tokenList.tokenIndex(limitEndOffset); + limitEndIndex = indexAndTokenOffset[0]; + if (limitEndIndex != -1) { + // If the limitStartOffset is "inside" a token and it's not at or beyond end of TL + if (limitEndOffset != indexAndTokenOffset[1] && limitEndIndex < tokenList.tokenCountCurrent()) { + limitEndIndex++; // Include the token that contains the offset } - } - - if (limitEndIndex > 0) { - // tokenCount surely >0 - int tokenOffset = tokenList.tokenOffset(limitEndIndex - 1); - if (limitEndOffset > tokenOffset) { // may need to create further tokens if they do not exist - // Force token list to create subsequent tokens - // Cannot subtract offset by each token's length because - // there may be gaps between tokens due to token id filter use. - AbstractToken token = token(limitEndIndex - 1); - int tokenLength = token.length(); - while (limitEndOffset > tokenOffset + tokenLength) { // above present token - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(limitEndIndex); - if (tokenOrEmbeddingContainer != null) { - token = LexerUtilsConstants.token(tokenOrEmbeddingContainer); - if (tokenList.isContinuous() || token.isFlyweight()) { - tokenOffset += tokenLength; - } else { // retrieve offset - tokenOffset = tokenList.tokenOffset(limitEndIndex); - } - tokenLength = token.length(); - limitEndIndex++; - } else { // no more tokens => break - break; - } - } - - } else { // end index within existing tokens - // The offset is within the currently recognized tokens - // Use binary search - int low = 0; - limitEndIndex--; - - while (low <= limitEndIndex) { - int mid = (low + limitEndIndex) / 2; - int midStartOffset = tokenList.tokenOffset(mid); - - if (midStartOffset < limitEndOffset) { - low = mid + 1; - } else if (midStartOffset > limitEndOffset) { - limitEndIndex = mid - 1; - } else { // Token starting exactly at offset found - limitEndIndex = mid - 1; - break; - } - } - limitEndIndex++; // Increase from 'high' to end index - } + } else { // No tokens at all + limitEndIndex = 0; } } // Compute limitStartIndex (currently == 0) if (limitEndIndex > 0 && limitStartOffset > 0) { - int high = limitEndIndex - 1; - while (limitStartIndex <= high) { - int mid = (limitStartIndex + high) / 2; - int midStartOffset = tokenList.tokenOffset(mid); - - if (midStartOffset < limitStartOffset) { - limitStartIndex = mid + 1; - } else if (midStartOffset > limitStartOffset) { - high = mid - 1; - } else { // Token starting exactly at offset found - limitStartIndex = mid + 1; - break; - } - } - // Include previous token if it "includes" limitStartOffset (also handles gaps between tokens properly) - if (limitStartIndex > 0 && - tokenList.tokenOffset(limitStartIndex - 1) + token(limitStartIndex - 1).length() > limitStartOffset + // Although the binary search could only be in <0,limitEndIndex> bounds + // use regular TL.tokenIndex() because it has substantially better performance + // e.g. in JoinTokenList. + int[] indexAndTokenOffset = tokenList.tokenIndex(limitStartOffset); + limitStartIndex = indexAndTokenOffset[0]; + // Check if the limitStartOffset is not in gap after end of token at limitStartIndex + if (limitStartIndex < tokenList.tokenCountCurrent() && + indexAndTokenOffset[1] + tokenList.tokenOrEmbedding(limitStartIndex).token().length() <= limitStartOffset ) { - limitStartIndex--; + limitStartIndex++; } } } @@ -204,22 +150,26 @@ return limitEndOffset; } - public Object tokenOrEmbeddingContainer(int index) { + public TokenOrEmbedding tokenOrEmbedding(int index) { index += limitStartIndex; return (index < limitEndIndex) - ? tokenList.tokenOrEmbeddingContainer(index) + ? tokenList.tokenOrEmbedding(index) : null; } - public int tokenOffset(int index) { + public int tokenOffsetByIndex(int index) { index += limitStartIndex; if (index >= limitEndIndex) throw new IndexOutOfBoundsException("index=" + index + " >= limitEndIndex=" + limitEndIndex); - return tokenList.tokenOffset(index); + return tokenList.tokenOffsetByIndex(index); + } + + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexBinSearch(this, offset, tokenCountCurrent()); } public int tokenCount() { - return limitEndIndex - limitStartIndex; + return tokenCountCurrent(); } public int tokenCountCurrent() { @@ -238,20 +188,20 @@ return tokenList.languagePath(); } - public int childTokenOffset(int rawOffset) { - throw new IllegalStateException("Unexpected call."); - } - - public char childTokenCharAt(int rawOffset, int index) { - throw new IllegalStateException("Unexpected call."); + public int tokenOffset(AbstractToken token) { + return tokenList.tokenOffset(token); } public void wrapToken(int index, EmbeddingContainer embeddingContainer) { tokenList.wrapToken(limitStartIndex + index, embeddingContainer); } - public TokenList root() { - return tokenList.root(); + public TokenList rootTokenList() { + return tokenList.rootTokenList(); + } + + public CharSequence inputSourceText() { + return rootTokenList().inputSourceText(); } public TokenHierarchyOperation tokenHierarchyOperation() { @@ -281,14 +231,14 @@ public int startOffset() { if (tokenCountCurrent() > 0 || tokenCount() > 0) - return tokenOffset(0); + return tokenOffsetByIndex(0); return limitStartOffset; } public int endOffset() { int cntM1 = tokenCount() - 1; if (cntM1 >= 0) - return tokenOffset(cntM1) + token(cntM1).length(); + return tokenOffsetByIndex(cntM1) + tokenList.tokenOrEmbedding(cntM1).token().length(); return limitStartOffset; } @@ -296,8 +246,4 @@ return tokenList.isRemoved(); } - private AbstractToken token(int index) { - return LexerUtilsConstants.token(tokenList, index); - } - } \ No newline at end of file diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TextLexerInputOperation.java --- a/lexer/src/org/netbeans/lib/lexer/TextLexerInputOperation.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/TextLexerInputOperation.java Wed May 28 14:48:54 2008 +0200 @@ -51,76 +51,58 @@ * @author Miloslav Metelka * @version 1.00 */ - public class TextLexerInputOperation extends LexerInputOperation { - + /** * Input text from which the reading of characters is done. */ - private final CharSequence inputText; + private final CharSequence inputSourceText; - private final int inputTextStartOffset; - /** - * End of valid chars in readCharArray (points to first invalid char). + * Point beyond which the reading cannot go. */ - private int readEndIndex; - + private int readEndOffset; - public TextLexerInputOperation(TokenList tokenList, CharSequence inputText) { - this(tokenList, 0, null, inputText, 0, 0, inputText.length()); + + public TextLexerInputOperation(TokenList tokenList) { + this(tokenList, 0, null, 0, -1); } public TextLexerInputOperation(TokenList tokenList, int tokenIndex, - Object lexerRestartState, CharSequence inputText, int inputTextStartOffset, - int startOffset, int endOffset) { + Object lexerRestartState, int startOffset, int endOffset) { super(tokenList, tokenIndex, lexerRestartState); - this.inputText = inputText; - this.inputTextStartOffset = inputTextStartOffset; - - // Make the offsets relative to the input start offset - startOffset -= inputTextStartOffset; - endOffset -= inputTextStartOffset; + this.inputSourceText = tokenList.inputSourceText(); + if (endOffset == -1) { + endOffset = inputSourceText.length(); + } assert (0 <= startOffset) && (startOffset <= endOffset) - && (endOffset <= inputText.length()) + && (endOffset <= inputSourceText.length()) : "startOffset=" + startOffset + ", endOffset=" + endOffset - + ", inputText.length()=" + inputText.length(); - setTokenStartIndex(startOffset); - readEndIndex = endOffset; + + ", inputSourceText.length()=" + inputSourceText.length(); + tokenStartOffset = startOffset; + readOffset = tokenStartOffset; + readEndOffset = endOffset; } - public int read(int index) { // index >= 0 is guaranteed by contract - index += tokenStartIndex(); - if (index < readEndIndex) { - return inputText.charAt(index); + public int read(int offset) { + if (offset < readEndOffset) { + return inputSourceText.charAt(offset); } else { // must read next or return EOF return LexerInput.EOF; } } - public char readExisting(int index) { - return inputText.charAt(tokenStartIndex() + index); + public char readExisting(int offset) { + return inputSourceText.charAt(offset); } - public void approveToken(AbstractToken token) { - if (isSkipToken(token)) { - preventFlyToken(); - - } else if (token.isFlyweight()) { - assert isFlyTokenAllowed(); - flyTokenAdded(); - - } else { // non-flyweight token - token.setTokenList(tokenList()); - token.setRawOffset(inputTextStartOffset + tokenStartIndex()); - clearFlySequence(); - } - - tokenApproved(); + protected void fillTokenData(AbstractToken token) { + token.setTokenList(tokenList); + token.setRawOffset(tokenStartOffset); } - + protected final int readEndIndex() { - return readEndIndex; + return readEndOffset; } } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TokenHierarchyOperation.java --- a/lexer/src/org/netbeans/lib/lexer/TokenHierarchyOperation.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/TokenHierarchyOperation.java Wed May 28 14:48:54 2008 +0200 @@ -41,6 +41,7 @@ package org.netbeans.lib.lexer; +import java.io.IOException; import java.io.Reader; import java.util.Collections; import java.util.HashMap; @@ -55,8 +56,6 @@ import org.netbeans.api.lexer.TokenHierarchyEvent; import org.netbeans.api.lexer.TokenHierarchyListener; import org.netbeans.api.lexer.TokenHierarchy; -import org.netbeans.lib.lexer.batch.CopyTextTokenList; -import org.netbeans.lib.lexer.batch.TextTokenList; import org.netbeans.lib.lexer.inc.IncTokenList; import org.netbeans.lib.lexer.inc.TokenHierarchyEventInfo; import org.netbeans.spi.lexer.MutableTextInput; @@ -66,8 +65,8 @@ import org.netbeans.api.lexer.TokenId; import org.netbeans.api.lexer.TokenSequence; import org.netbeans.lib.editor.util.ArrayUtilities; +import org.netbeans.lib.lexer.inc.TokenHierarchyUpdate; import org.netbeans.lib.lexer.inc.TokenListChange; -import org.netbeans.lib.lexer.token.AbstractToken; /** * Token hierarchy operation services tasks of its associated token hierarchy. @@ -79,8 +78,9 @@ */ public final class TokenHierarchyOperation { // "I" stands for input - - static final Logger LOG = TokenHierarchyUpdate.LOG; + + // -J-Dorg.netbeans.lib.lexer.TokenHierarchyOperation.level=FINE + static final Logger LOG = Logger.getLogger(TokenHierarchyOperation.class.getName()); // -J-Dorg.netbeans.spi.lexer.MutableTextInput.level=FINE private static final Logger LOG_LOCK = Logger.getLogger(MutableTextInput.class.getName()); // Logger for read/write-lock @@ -101,8 +101,12 @@ * Mutable text input for mutable token hierarchy or null otherwise. */ private MutableTextInput mutableTextInput; - - private TokenList rootTokenList; + + /** + * Root token list of this hierarchy. It is created in constructor and never changed + * during the whole lifetime of the token hierarchy. + */ + private final TokenList rootTokenList; /** * The hierarchy can be made inactive to release the tokens @@ -141,7 +145,7 @@ * If a token list list is contained then all its parents * with the shorter language path are also mandatorily maintained. */ - private Map path2tokenListList; + private Map> path2tokenListList; private int maxTokenListListPathSize; @@ -159,7 +163,37 @@ @SuppressWarnings("unchecked") I input = (I)inputReader; this.inputSource = input; - this.rootTokenList = new CopyTextTokenList(this, inputReader, + + // Instead of using an original CopyTextTokenList that allowed to skip + // individual characters of all flyweight tokens do just a copy of all chars + // from the Reader. TBD - do a lazy reading instead of pre-reading. + char[] chars = new char[LexerUtilsConstants.READER_TEXT_BUFFER_SIZE]; + int offset = 0; + try { + while (true) { + int readLen = inputReader.read(chars, offset, chars.length - offset); + if (readLen == -1) // End of stream + break; + offset += readLen; + if (offset == chars.length) { // Full buffer + chars = ArrayUtilities.charArray(chars); // Double array size + } + } + } catch (IOException e) { + // Ignored silently - there should be a wrapping reader catching and properly handling + // this IOException. + } finally { + // Attempt to close the Reader + try { + inputReader.close(); + } catch (IOException e) { + // Ignored silently - there should be a wrapping reader catching and properly handling + // this IOException. + } + } + String inputText = new String(chars, 0, offset); // Copy of reader's whole text + + this.rootTokenList = new BatchTokenList(this, inputText, language, skipTokenIds, inputAttributes); init(); activity = Activity.ACTIVE; @@ -178,10 +212,13 @@ @SuppressWarnings("unchecked") I input = (I)inputText; this.inputSource = input; - this.rootTokenList = copyInputText - ? new CopyTextTokenList(this, inputText, - language, skipTokenIds, inputAttributes) - : new TextTokenList(this, inputText, + if (copyInputText) { + // Instead of using an original CopyTextTokenList (that allowed to skip + // individual characters of all flyweight tokens) do just a copy of the full text + // and use regular BatchTokenList. + inputText = inputText.toString(); + } + this.rootTokenList = new BatchTokenList(this, inputText, language, skipTokenIds, inputAttributes); init(); activity = Activity.ACTIVE; @@ -260,10 +297,8 @@ return; } } else { // Wishing to be inactive - change = new TokenListChange(incTokenList); -// change.setIndex(0); -// change.setOffset(0); - incTokenList.replaceTokens(change, incTokenList.tokenCountCurrent(), 0); + change = TokenListChange.createRebuildChange(incTokenList); + incTokenList.replaceTokens(change, 0); incTokenList.setLanguagePath(null); incTokenList.reinit(); } @@ -354,7 +389,7 @@ ensureReadLocked(); synchronized (rootTokenList) { return isActiveImpl() - ? new TokenSequenceList(this, languagePath, startOffset, endOffset) + ? new TokenSequenceList(rootTokenList, languagePath, startOffset, endOffset) : null; } } @@ -364,11 +399,12 @@ *
* If the list needs to be created or it was non-mandatory. */ - public TokenListList tokenListList(LanguagePath languagePath) { + public TokenListList tokenListList(LanguagePath languagePath) { assert isActiveNoInit() : "Token hierarchy expected to be active."; - TokenListList tll = path2tokenListList().get(languagePath); + @SuppressWarnings("unchecked") + TokenListList tll = (TokenListList) path2tokenListList().get(languagePath); if (tll == null) { - tll = new TokenListList(this, languagePath); + tll = new TokenListList(rootTokenList, languagePath); path2tokenListList.put(languagePath, tll); maxTokenListListPathSize = Math.max(languagePath.size(), maxTokenListListPathSize); // Also create parent token list lists if they don't exist yet @@ -379,23 +415,27 @@ return tll; } - private Map path2tokenListList() { + /** + * Get existing token list list or null if the TLL does not exist yet. + */ + public TokenListList existingTokenListList(LanguagePath languagePath) { + synchronized (rootTokenList()) { + @SuppressWarnings("unchecked") + TokenListList tll = (path2tokenListList != null) + ? (TokenListList) path2tokenListList.get(languagePath) + : null; + return tll; + } + } + + private Map> path2tokenListList() { if (path2tokenListList == null) { - path2tokenListList = new HashMap(4, 0.5f); + path2tokenListList = new HashMap>(4, 0.5f); } return path2tokenListList; } - /** - * Get existing token list list or null if the TLL does not exist yet. - */ - public TokenListList existingTokenListList(LanguagePath languagePath) { - synchronized (rootTokenList()) { - return (path2tokenListList != null) ? path2tokenListList.get(languagePath) : null; - } - } - - int maxTokenListListPathSize() { + public int maxTokenListListPathSize() { return maxTokenListListPathSize; } @@ -405,15 +445,11 @@ if (isActiveNoInit()) { IncTokenList incTokenList = (IncTokenList)rootTokenList; incTokenList.incrementModCount(); - TokenListChange change = new TokenListChange(incTokenList); CharSequence text = LexerSpiPackageAccessor.get().text(mutableTextInput); TokenHierarchyEventInfo eventInfo = new TokenHierarchyEventInfo( this, TokenHierarchyEventType.REBUILD, 0, 0, "", 0); - change.setIndex(0); - change.setOffset(0); - change.setAddedEndOffset(0); // Tokens will be recreated lazily - - incTokenList.replaceTokens(change, incTokenList.tokenCountCurrent(), 0); + TokenListChange change = TokenListChange.createRebuildChange(incTokenList); + incTokenList.replaceTokens(change, 0); incTokenList.reinit(); // Will relex tokens lazily eventInfo.setTokenChangeInfo(change.tokenChangeInfo()); @@ -453,55 +489,7 @@ TokenHierarchyEventInfo eventInfo = new TokenHierarchyEventInfo( this, TokenHierarchyEventType.MODIFICATION, offset, removedLength, removedText, insertedLength); - // First a top-level token list will be updated then the embedded ones. - IncTokenList incTokenList = (IncTokenList)rootTokenList; - - if (LOG.isLoggable(Level.FINEST)) { - // Display current state of the hierarchy by faking its text - // through original text - CharSequence text = incTokenList.text(); - assert (text != null); - incTokenList.setText(eventInfo.originalText()); - // Dump all contents - LOG.finest(toString()); - // Return the original text - incTokenList.setText(text); - } - - if (LOG.isLoggable(Level.FINE)) { - StringBuilder sb = new StringBuilder(150); - sb.append("<<<<<<<<<<<<<<<<<< LEXER CHANGE START ------------------\n"); // NOI18N - sb.append(eventInfo.modificationDescription(false)); - TokenHierarchyUpdate.LOG.fine(sb.toString()); - } - - new TokenHierarchyUpdate(eventInfo).update(incTokenList); - - if (LOG.isLoggable(Level.FINE)) { - LOG.fine("AFFECTED: " + eventInfo.dumpAffected() + "\n"); // NOI18N - String extraMsg = ""; - if (LOG.isLoggable(Level.FINER)) { - // Check consistency of the whole token hierarchy - String error = checkConsistency(); - if (error != null) { - String msg = "!!!CONSISTENCY-ERROR!!!: " + error + "\n"; - if (LOG.isLoggable(Level.FINEST)) { - throw new IllegalStateException(msg); - } else { - LOG.finer(msg); - } - } else { - extraMsg = "(TokenHierarchy Check OK) "; - } - } - LOG.fine(">>>>>>>>>>>>>>>>>> LEXER CHANGE END " + extraMsg + "------------------\n"); // NOI18N - } - - if (LOG.isLoggable(Level.FINEST)) { - LOG.finest("AFTER UPDATE:\n"); - LOG.finest(toString()); - } - + new TokenHierarchyUpdate(eventInfo).update(); fireTokenHierarchyChanged(eventInfo); } } @@ -553,7 +541,7 @@ // // private void checkIsSnapshot() { // if (!isSnapshot()) { -// throw new IllegalStateException("Not a snapshot"); +// throw new IllegalStateException("Not a snapshot"); // } // } // @@ -681,17 +669,17 @@ */ public String checkConsistency() { // Check root token list first - String error = checkConsistencyTokenList(rootTokenList(), ArrayUtilities.emptyIntArray(), 0); + String error = LexerUtilsConstants.checkConsistencyTokenList(rootTokenList(), true); // Check token-list lists if (error == null && path2tokenListList != null) { - for (TokenListList tll : path2tokenListList.values()) { + for (TokenListList tll : path2tokenListList.values()) { // Check token-list list consistency error = tll.checkConsistency(); if (error != null) return error; // Check each individual token list in token-list list for (TokenList tl : tll) { - error = checkConsistencyTokenList(tl, ArrayUtilities.emptyIntArray(), tl.startOffset()); + error = LexerUtilsConstants.checkConsistencyTokenList(tl, false); if (error != null) { return error; } @@ -699,120 +687,6 @@ } } return error; - } - - private String checkConsistencyTokenList(TokenList tokenList, - int[] parentIndexes, int firstTokenOffset) { - int tokenCountCurrent = tokenList.tokenCountCurrent(); - int[] indexes = ArrayUtilities.intArray(parentIndexes, parentIndexes.length + 1); - boolean continuous = tokenList.isContinuous(); - int lastOffset = firstTokenOffset; - for (int i = 0; i < tokenCountCurrent; i++) { - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(i); - if (tokenOrEmbeddingContainer == null) { - return dumpContext("Null token", tokenList, i, parentIndexes); // NOI18N - } - AbstractToken token = LexerUtilsConstants.token(tokenOrEmbeddingContainer); - // Check whether tokenList.startOffset() corresponds to the start of first token - if (i == 0 && continuous && tokenCountCurrent > 0 && !token.isFlyweight()) { - if (token.offset(null) != tokenList.startOffset()) { - return dumpContext("firstToken.offset()=" + token.offset(null) + - " != tokenList.startOffset()=" + tokenList.startOffset(), - tokenList, i, parentIndexes); - } - } - if (!token.isFlyweight() && token.tokenList() != tokenList) { - return dumpContext("Invalid token.tokenList()=" + token.tokenList(), - tokenList, i, parentIndexes); - } - if (token.text() == null) { - return dumpContext("Null token.text()=" + token.tokenList(), - tokenList, i, parentIndexes); - } - int offset = (token.isFlyweight()) ? lastOffset : token.offset(null); - if (offset < 0) { - return dumpContext("Token offset=" + offset + " < 0", tokenList, i, parentIndexes); // NOI18N - } - if (offset < lastOffset) { - return dumpContext("Token offset=" + offset + " < lastOffset=" + lastOffset, - tokenList, i, parentIndexes); - } - if (offset > lastOffset && continuous) { - return dumpContext("Gap between tokens; offset=" + offset + ", lastOffset=" + lastOffset, - tokenList, i, parentIndexes); - } - lastOffset = offset + token.length(); - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - EmbeddingContainer ec = (EmbeddingContainer)tokenOrEmbeddingContainer; - EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); - while (etl != null) { - String error = checkConsistencyTokenList(etl, indexes, offset + etl.embedding().startSkipLength()); - if (error != null) - return error; - etl = etl.nextEmbeddedTokenList(); - } - } - } - return null; - } - - private String dumpContext(String msg, TokenList tokenList, int index, int[] parentIndexes) { - StringBuilder sb = new StringBuilder(); - sb.append(msg); - sb.append(" at index="); // NOI18N - sb.append(index); - sb.append(" of tokens of language "); // NOI18N - sb.append(tokenList.languagePath().innerLanguage().mimeType()); - sb.append('\n'); - LexerUtilsConstants.appendTokenList(sb, tokenList, index, index - 2, index + 3, false, 0); - sb.append("\nParents:\n"); // NOI18N - sb.append(tracePath(parentIndexes, tokenList.languagePath())); - return sb.toString(); - } - - public String findTokenContext(AbstractToken token) { - return findTokenContext(token, rootTokenList(), ArrayUtilities.emptyIntArray()); - } - - private String findTokenContext(AbstractToken token, TokenList tokenList, int[] parentIndexes) { - int tokenCountCurrent = tokenList.tokenCountCurrent(); - int[] indexes = ArrayUtilities.intArray(parentIndexes, parentIndexes.length + 1); - for (int i = 0; i < tokenCountCurrent; i++) { - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainer(i); - if (tokenOrEmbeddingContainer == null) { - continue; - } - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - EmbeddingContainer ec = (EmbeddingContainer)tokenOrEmbeddingContainer; - if (ec.token() == token) { - return dumpContext("Token found.", tokenList, i, indexes); - } - EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); - while (etl != null) { - String context = findTokenContext(token, etl, indexes); - if (context != null) - return context; - etl = etl.nextEmbeddedTokenList(); - } - - } else if (tokenOrEmbeddingContainer == token) { - return dumpContext("Token found.", tokenList, i, indexes); - } - } - return null; - } - - private String tracePath(int[] indexes, LanguagePath languagePath) { - StringBuilder sb = new StringBuilder(); - TokenList tokenList = rootTokenList(); - for (int i = 0; i < indexes.length; i++) { - LexerUtilsConstants.appendTokenInfo(sb, tokenList, i, - tokenHierarchy(), false, 0); - // Assign language to variable to get rid of javac bug for incremental compilation on 1.5 - Language language = languagePath.language(i); - tokenList = EmbeddingContainer.embeddedTokenList(tokenList, indexes[i], language); - } - return sb.toString(); } // private final class SnapshotRef extends WeakReference> implements Runnable { diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TokenHierarchyUpdate.java --- a/lexer/src/org/netbeans/lib/lexer/TokenHierarchyUpdate.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,628 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.inc.IncTokenList; -import org.netbeans.lib.lexer.inc.MutableTokenList; -import org.netbeans.lib.lexer.inc.TokenHierarchyEventInfo; -import org.netbeans.lib.lexer.inc.TokenListChange; -import org.netbeans.lib.lexer.inc.TokenListUpdater; -import static org.netbeans.lib.lexer.LexerUtilsConstants.INVALID_STATE; - -/** - * Request for updating of token hierarchy after text modification - * or custom embedding creation/removal. - *
- * This class contains all the data and methods related to updating. - * - * @author Miloslav Metelka - */ - -public final class TokenHierarchyUpdate { - - // -J-Dorg.netbeans.lib.lexer.TokenHierarchyUpdate.level=FINE - static final Logger LOG = Logger.getLogger(TokenHierarchyUpdate.class.getName()); - - final TokenHierarchyEventInfo eventInfo; - - private Map path2info; - - /** - * Infos ordered from higher top levels of the hierarchy to lower levels. - * Useful for top-down updating at the end. - */ - private List> levelInfos; - - TokenListChange rootChange; - - public TokenHierarchyUpdate(TokenHierarchyEventInfo eventInfo) { - this.eventInfo = eventInfo; - } - - public void update(IncTokenList incTokenList) { - incTokenList.incrementModCount(); - // Update top-level token list first - // It does not need any updateStatusImpl() since it's only for embedded token lists - rootChange = updateTokenListByModification(incTokenList, null); - eventInfo.setTokenChangeInfo(rootChange.tokenChangeInfo()); - - if (LOG.isLoggable(Level.FINE)) { - LOG.fine("ROOT CHANGE: " + rootChange.toString(0) + "\n"); // NOI18N - } - - // If there is an active lexer input operation (for not-yet finished - // top-level token list lexing) refresh it because it would be damaged - // by the performed token list update - if (!incTokenList.isFullyLexed()) { - incTokenList.refreshLexerInputOperation(); - } - - // Now the update goes to possibly embedded token list lists - // based on the top-level change. If there are embeddings that join sections - // this becomes a fairly complex thing. - // 1. The updating must always go from upper levels to lower levels of the token hierarchy - // to ensure that the tokens of the possible joined embeddings get updated properly - // as the tokens created/removed at upper levels may contain embeddings that will - // need to be added/removed from token list lists on lower level. - // 2. A single insert/remove may produce token updates at several - // places in the document. A top-level change of token with embedding - // will request the embedded token list update and that token list - // may be connected with another joined token list(s) with the same language path - // and the update may continue into these joined token lists. - - // 3. The algorithm must collect both removed and added token lists - // in the TLLInfo. - // 4. For a removed token list the updating must check nested embedded token lists - // because some embedded tokens of the removed embedded token list might contain - // another embedding that might also be maintained as token list list - // and need to be updated. - // 5. The parent token list lists - // are always maintained too which simplifies the updating algorithm - // and speeds it up because the token list list marks whether it has any children - // or not and so the deep traversing only occurs if there are any children present. - // 6. Additions may produce nested additions too so they need to be makred - // similarly to removals. - if (rootChange.isBoundsChange()) { - processBoundsChangeEmbeddings(rootChange, null); - } else { - // Mark changed area based on start of first mod.token and end of last mod.token - // of the root-level change - eventInfo.setMinAffectedStartOffset(rootChange.offset()); - eventInfo.setMaxAffectedEndOffset(rootChange.addedEndOffset()); - processNonBoundsChange(rootChange); - } - - processLevelInfos(); - } - - public void updateCreateEmbedding(EmbeddedTokenList addedTokenList) { - TLLInfo info = info(addedTokenList.languagePath()); - if (info != NO_INFO) { - if (LOG.isLoggable(Level.FINE)) { - LOG.fine("THU.updateCreateEmbedding(): " + addedTokenList.toStringHeader()); - } - info.markAdded(addedTokenList); - processLevelInfos(); - } - } - - /** - * update-status must be called by the caller. - * @param removedTokenList token list removed by TS.removeEmbedding(). - */ - public void updateRemoveEmbedding(EmbeddedTokenList removedTokenList) { - TLLInfo info = info(removedTokenList.languagePath()); - if (info != NO_INFO) { - if (LOG.isLoggable(Level.FINE)) { - LOG.fine("THU.updateRemoveEmbedding(): " + removedTokenList.toStringHeader()); - } - // update-status called by caller. - info.markRemoved(removedTokenList); - processLevelInfos(); - } - } - - void processBoundsChangeEmbeddings(TokenListChange change, TokenListChange parentChange) { - // Add an embedded change to the parent change (if exists) - if (parentChange != null) { - parentChange.tokenChangeInfo().addEmbeddedChange(change.tokenChangeInfo()); - } - Object tokenOrEC = change.tokenChangeInfo().removedTokenList().tokenOrEmbeddingContainer(0); - if (tokenOrEC.getClass() == EmbeddingContainer.class) { - TLLInfo info; - boolean hasChildren; - if (change.languagePath().size() > 1) { - info = info(change.languagePath()); - hasChildren = (info != NO_INFO) ? info.tokenListList().hasChildren() : false; - } else { // root-level - info = NO_INFO; - hasChildren = (eventInfo.tokenHierarchyOperation().maxTokenListListPathSize() > 0); - } - EmbeddingContainer ec = (EmbeddingContainer)tokenOrEC; - rewrapECToken(ec, change); // Includes updateStatusImpl() - EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); - if (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING) { - // Check the text length beyond modification => end skip length must not be affected - int modRelOffset = eventInfo.modificationOffset() - change.offset(); - int beyondModLength = change.addedEndOffset() - (eventInfo.modificationOffset() + eventInfo.diffLengthOrZero()); - EmbeddedTokenList prevEtl = null; - do { - TLLInfo childInfo = hasChildren ? info(etl.languagePath()) : NO_INFO; - // Check whether the change was not in the start or end skip lengths - // and if so then remove the embedding - if (modRelOffset >= etl.embedding().startSkipLength() - && beyondModLength >= etl.embedding().endSkipLength() - ) { // Modification within embedding's bounds => embedding can stay - // Mark that the embedding should be updated - if (childInfo != NO_INFO) { - // update-status called by rewrap-ec-token above - childInfo.markBoundsChange(etl); - } else { // No child but want to update nested possible bounds changes - if (etl.isInited()) { - parentChange = change; - // Perform change in child - it surely does not join the sections - // since otherwise the childInfo could not be null - // update-status done above for the embedding container - change = updateTokenListByModification(etl, null); - if (change.isBoundsChange()) { - processBoundsChangeEmbeddings(change, parentChange); - } else { - eventInfo.setMinAffectedStartOffset(change.offset()); - eventInfo.setMaxAffectedEndOffset(change.addedEndOffset()); - } - } - } - prevEtl = etl; - etl = etl.nextEmbeddedTokenList(); - - } else { // Mod in skip lengths => Remove the etl from chain - if (childInfo != NO_INFO) { - // update-status already done as part of rewrap-token - childInfo.markRemoved(etl); - } - // Remove embedding and get the next embedded token list (prevEtl stays the same) - etl = ec.removeEmbeddedTokenList(prevEtl, etl); - } - } while (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING); - } - } - } - - void processNonBoundsChange(TokenListChange change) { - TLLInfo info; - boolean hasChildren; - if (change.languagePath().size() >= 2) { - info = info(change.languagePath()); - hasChildren = (info != NO_INFO && info.tokenListList().hasChildren()); - } else { // root change - info = NO_INFO; - hasChildren = (eventInfo.tokenHierarchyOperation().maxTokenListListPathSize() > 0); - } - if (hasChildren) { - // First mark the removed embeddings - TokenList removedTokenList = change.tokenChangeInfo().removedTokenList(); - if (removedTokenList != null) { - markRemovedEmbeddings(removedTokenList); - } - - // Now mark added embeddings - TokenList currentTokenList = change.tokenChangeInfo().currentTokenList(); - markAddedEmbeddings(currentTokenList, change.index(), change.addedTokensOrBranchesCount()); - } - } - - /** - * Collect removed embeddings for the given token list recursively - * and nest deep enough for all maintained children - * token list lists. - */ - private void markRemovedEmbeddings(TokenList removedTokenList) { - int tokenCount = removedTokenList.tokenCountCurrent(); - for (int i = 0; i < tokenCount; i++) { - Object tokenOrEC = removedTokenList.tokenOrEmbeddingContainer(i); - if (tokenOrEC.getClass() == EmbeddingContainer.class) { - EmbeddingContainer ec = (EmbeddingContainer)tokenOrEC; - ec.updateStatusImpl(); // Update status since markRemoved() will need it - EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); - while (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING) { - TLLInfo info = info(etl.languagePath()); - if (info != NO_INFO) { - // update-status called above - info.markRemoved(etl); - } - etl = etl.nextEmbeddedTokenList(); - } - } - } - } - - private void markAddedEmbeddings(TokenList tokenList, int index, int addedCount) { - for (int i = 0; i < addedCount; i++) { - // Ensure that the default embedding gets possibly created - EmbeddedTokenList etl = EmbeddingContainer.embeddedTokenList(tokenList, index + i, null); - if (etl != null) { - TLLInfo info = info(etl.languagePath()); - if (info != NO_INFO) { - // Mark that there was a new embedded token list added - // There should be no updateStatusImpl() necessary since the token lists are new - // and the parent embedding container was surely updated by the updating process. - info.markAdded(etl); - } - } - } - } - - private void processLevelInfos() { - // Now relex the changes in affected token list lists - // i.e. fix the tokens after the token lists removals/additions. - // The higher-level updates - if (levelInfos != null) { - // The list can be extended by additional items dynamically during iteration - for (int i = 0; i < levelInfos.size(); i++) { - List infos = levelInfos.get(i); - // The "infos" list should not be extended by additional items dynamically during iteration - // However an extra items can be added at the deeper levels. - for (int j = 0; j < infos.size(); j++) { - infos.get(j).update(); - } - } - } - - // Assert that update was called on all infos - if (LOG.isLoggable(Level.FINER) && levelInfos != null) { - for (List infos : levelInfos) { - for (TLLInfo info : infos) { - if (!info.updateCalled) { - throw new IllegalStateException("Update not called on tokenListList\n" + // NOI18N - info.tokenListList); - } - } - } - } - } - - TokenListChange updateTokenListByModification( - MutableTokenList tokenList, Object zeroIndexRelexState) { - TokenListChange change = new TokenListChange(tokenList); -// if (tokenList instanceof EmbeddedTokenList) { -// ((EmbeddedTokenList)tokenList).embeddingContainer().checkStatusUpdated(); -// } - TokenListUpdater.update(tokenList, eventInfo.modificationOffset(), - eventInfo.insertedLength(), eventInfo.removedLength(), change, zeroIndexRelexState); - return change; - } - - /** - * Return tll info or NO_INFO if the token list list is not maintained - * for the given language path. - */ - private TLLInfo info(LanguagePath languagePath) { - if (path2info == null) { // Init since it will contain NO_INFO - path2info = new HashMap(4, 0.5f); - } - TLLInfo info = path2info.get(languagePath); - if (info == null) { - TokenListList tll = eventInfo.tokenHierarchyOperation().existingTokenListList(languagePath); - if (tll != null) { - info = new TLLInfo(this, tll); - int index = languagePath.size() - 2; - if (levelInfos == null) { - levelInfos = new ArrayList>(index + 1); - } - while (levelInfos.size() <= index) { - levelInfos.add(new ArrayList(2)); - } - levelInfos.get(index).add(info); - } else { // No token list list for the given language path - info = NO_INFO; - } - path2info.put(languagePath, info); - } - return info; - } - - private void rewrapECToken(EmbeddingContainer ec, TokenListChange change) { - @SuppressWarnings("unchecked") - TokenListChange tChange = (TokenListChange)change; - ec.reinit(tChange.addedToken(0)); - ec.updateStatusImpl(); - tChange.tokenList().wrapToken(tChange.index(), ec); - } - - /** - * Special constant value to avoid double map search for token list lists updating. - */ - static final TLLInfo NO_INFO = new TLLInfo(null, null); - - /** - * Information about update in a single token list list. - */ - static final class TLLInfo { - - final TokenHierarchyUpdate update; - - final TokenListList tokenListList; - - int index; - - int removeCount; - - List> added; - - TokenListChange change; - - boolean updateCalled; - - public TLLInfo(TokenHierarchyUpdate update, TokenListList tokenListList) { - this.update = update; - this.tokenListList = tokenListList; - this.index = -1; - this.added = Collections.emptyList(); - } - - public TokenListList tokenListList() { - return tokenListList; - } - - /** - * Mark the given token list as removed in this token list list. - * All removed token lists should be marked by their increasing offset - * so it should be necessary to search for the index just once. - *
- * It's expected that updateStatusImpl() was already called - * on the corresponding embedding container. - */ - public void markRemoved(EmbeddedTokenList removedTokenList) { - boolean indexWasMinusOne; // Used for possible exception cause debugging -// removedTokenList.embeddingContainer().checkStatusUpdated(); - if (index == -1) { - checkUpdateNotCalledYet(); - indexWasMinusOne = true; - index = tokenListList.findIndexDuringUpdate(removedTokenList, - update.eventInfo.modificationOffset(), update.eventInfo.removedLength()); - assert (index >= 0) : "index=" + index + " < 0"; // NOI18N - } else { // Index already initialized - indexWasMinusOne = false; - } - TokenList markedForRemoveTokenList = tokenListList.getOrNull(index + removeCount); - if (markedForRemoveTokenList != removedTokenList) { - int realIndex = tokenListList.indexOf(removedTokenList); - throw new IllegalStateException("Removing at index=" + index + // NOI18N - " but real index is " + realIndex + // NOI18N - " (indexWasMinusOne=" + indexWasMinusOne + ").\n" + // NOI18N - "Wishing to remove tokenList\n" + removedTokenList + // NOI18N - "\nbut marked-for-remove tokenList is \n" + markedForRemoveTokenList + // NOI18N - "\nfrom tokenListList\n" + tokenListList + // NOI18N - "\n\nModification description:\n" + update.eventInfo.modificationDescription(true) // NOI18N - ); - } - removeCount++; - } - - /** - * Mark the given token list to be added to this list of token lists. - * At the end first the token lists marked for removal will be removed - * and then the token lists marked for addition will be added. - *
- * It's expected that updateStatusImpl() was already called - * on the corresponding embedding container. - */ - public void markAdded(EmbeddedTokenList addedTokenList) { -// addedTokenList.embeddingContainer().checkStatusUpdated(); - if (added.size() == 0) { - checkUpdateNotCalledYet(); - if (index == -1) { - index = tokenListList.findIndex(addedTokenList.startOffset()); - assert (index >= 0) : "index=" + index + " < 0"; // NOI18N - } - added = new ArrayList>(4); - } - added.add(addedTokenList); - } - - /** - * Mark that a parent's token list's bounds change need to be propagated - * into the given (child) token list. - *
- * It's expected that updateStatusImpl() was already called - * on the corresponding embedding container. - */ - public void markBoundsChange(EmbeddedTokenList etl) { - assert (index == -1) : "index=" + index + " != -1"; // Should be the first one -// etl.embeddingContainer().checkStatusUpdated(); - checkUpdateNotCalledYet(); - index = tokenListList.findIndex(etl.startOffset()); - } - - public void update() { - checkUpdateNotCalledYet(); - updateCalled = true; - // Update this level (and language path). - // All the removed and added sections resulting from parent change(s) - // are already marked. - if (index == -1) - return; // Nothing to do - - if (removeCount == 0 && added.size() == 0) { // Bounds change only - if (LOG.isLoggable(Level.FINE)) { - LOG.fine("TLLInfo.update(): BOUNDS-CHANGE: " + tokenListList.languagePath().mimePath() + // NOI18N - " index=" + index + // NOI18N - '\n' - ); - } - - EmbeddedTokenList etl = tokenListList.get(index); - etl.embeddingContainer().updateStatusImpl(); - Object matchState = LexerUtilsConstants.endState(etl); - Object relexState = tokenListList.relexState(index); - // update-status called above - TokenListChange chng = update.updateTokenListByModification(etl, relexState); - relexState = LexerUtilsConstants.endState(etl, relexState); - // Prevent bounds change in case the states at the end of the section would not match - // which leads to relexing of the next section. - if (chng.isBoundsChange() && LexerUtilsConstants.statesEqual(relexState, matchState)) { - TokenListChange parentChange = (tokenListList.languagePath().size() == 2) - ? update.rootChange - : update.info(tokenListList.languagePath().parent()).change; - update.processBoundsChangeEmbeddings(chng, parentChange); - } else { // Regular change - update.processNonBoundsChange(chng); - } - relexAfterLastModifiedSection(index + 1, relexState, matchState); - - } else { // Non-bounds change - if (LOG.isLoggable(Level.FINE)) { - LOG.fine("TLLInfo.update(): REPLACE: " + tokenListList.languagePath().mimePath() + // NOI18N - " index=" + index + // NOI18N - ", removeCount=" + removeCount + // NOI18N - ", added.size()=" + added.size() + // NOI18N - '\n' - ); - } - - TokenList[] removed = tokenListList.replace(index, removeCount, added); - // Mark embeddings of removed token lists as removed - if (tokenListList.hasChildren()) { - for (int i = 0; i < removed.length; i++) { - TokenList removedTokenList = removed[i]; - update.markRemovedEmbeddings(removedTokenList); - } - } - - Object relexState; // State from which the relexing will start - Object matchState = INVALID_STATE; // State that needs to be reached by relexing - if (tokenListList.joinSections()) { // Need to find the right relexState - // Must update the token list by incremental algorithm - // Find non-empty token list and take last token's state - relexState = tokenListList.relexState(index); - for (int i = removed.length - 1; i >= 0 && matchState == INVALID_STATE; i--) { - matchState = LexerUtilsConstants.endState((EmbeddedTokenList)removed[i]); - } - // Find the start state as the previous non-empty section's last token's state - // for case there would be no token lists added or all the added sections - // would be empty. - if (matchState == INVALID_STATE) // None or just empty sections were removed - matchState = relexState; - - } else { // Not joining the sections - relexState = null; - } - - // Relex all the added token lists (just by asking for tokenCount - init() will be done) - for (int i = 0; i < added.size(); i++) { - EmbeddedTokenList tokenList = (EmbeddedTokenList)added.get(i); - assert (!tokenList.isInited()); - tokenList.init(relexState); - if (tokenList.embedding().joinSections()) { - tokenListList.setJoinSections(true); - } - relexState = LexerUtilsConstants.endState((EmbeddedTokenList)tokenList, relexState); - if (tokenListList.hasChildren()) { - update.markAddedEmbeddings(tokenList, 0, tokenList.tokenCount()); - } - // Added token lists should not require updateStatus() - update.eventInfo.setMaxAffectedEndOffset(tokenList.endOffset()); - } - - if (tokenListList.joinSections()) { - index += added.size(); - relexAfterLastModifiedSection(index, relexState, matchState); - } - } - -// for (EmbeddedTokenList etl : tokenListList) { -// etl.embeddingContainer().updateStatusImpl(); -// if (etl.embeddingContainer().isRemoved()) -// throw new IllegalStateException(); -// } - // Set index to -1 to simplify correctness checking - index = -1; - } - - void checkUpdateNotCalledYet() { - if (updateCalled) { - throw new IllegalStateException("Update already called on \n" + tokenListList); - } - } - - private void relexAfterLastModifiedSection(int index, Object relexState, Object matchState) { - // Must continue relexing existing section(s) (from a different start state) - // until the relexing will stop before the last token of the given section. - EmbeddedTokenList etl; - while (!LexerUtilsConstants.statesEqual(relexState, matchState) - && (etl = tokenListList.getOrNull(index)) != null - ) { - etl.embeddingContainer().updateStatusImpl(); - if (etl.tokenCount() > 0) { - // Remember state after the last token of the given section - matchState = etl.state(etl.tokenCount() - 1); - // updateStatusImpl() just called - TokenListChange chng = updateTokenListAtStart(etl, etl.startOffset(), relexState); - update.processNonBoundsChange(chng); - // Since the section is non-empty (checked above) there should be >0 tokens - relexState = etl.state(etl.tokenCount() - 1); - } - index++; - } - } - - private TokenListChange updateTokenListAtStart( - EmbeddedTokenList etl, int offset, Object zeroIndexRelexState) { - TokenListChange chng = new TokenListChange(etl); -// etl.embeddingContainer().checkStatusUpdated(); - TokenListUpdater.update(etl, offset, 0, 0, chng, zeroIndexRelexState); - update.eventInfo.setMaxAffectedEndOffset(chng.addedEndOffset()); - return chng; - } - - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TokenList.java --- a/lexer/src/org/netbeans/lib/lexer/TokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/TokenList.java Wed May 28 14:48:55 2008 +0200 @@ -92,7 +92,7 @@ * @param >=0 index of the token in this list. * @return valid token or null if the index is too high. */ - Object tokenOrEmbeddingContainer(int index); + TokenOrEmbedding tokenOrEmbedding(int index); /** * Replace flyweight token at the given index with its non-flyweight copy. @@ -120,11 +120,41 @@ * Get absolute offset of the token at the given index in the token list. *
* This method can only be called if the token at the given index - * was already fetched by {@link tokenOrEmbeddingContainer(int)}. + * was already fetched by {@link tokenOrEmbedding(int)}. + *
+ * For EmbeddedTokenList a EmbeddingContainer.updateStatus() must be called + * prior this method to obtain up-to-date results. */ - int tokenOffset(int index); + int tokenOffsetByIndex(int index); /** + * Get absolute offset of a token contained in this token list. + *
+ * For EmbeddedTokenList a EmbeddingContainer.updateStatus() must be called + * prior this method to obtain up-to-date results. + * + * @param token non-null child token of this token list. + * @return absolute offset in the input. + */ + int tokenOffset(AbstractToken token); + + /** + * Get index of the token that "contains" the given offset. + *
+ * The result is in sync with TokenSequence.moveOffset(). + * + * @param offset offset for which the token index should be found. + * @return array of two items where the [0] is token's index and [1] is its offset. + *
+ * If offset >= last-token-end-offset then [0] contains token-count and + * [1] conains last-token-end-offset. + *
+ * [0] may contain -1 to indicate that there are no tokens in the token list + * ([1] then contains zero). + */ + int[] tokenIndex(int offset); + + /** * Get total count of tokens in the list. *
* For token lists that create the tokens lazily @@ -154,11 +184,12 @@ *

* This is also used to check whether this token list corresponds to mutable input * or not because unmodifiable lists return -1 from this method. + *

* *

- * For branch token lists the {@link #updateStartOffsetShift()} ensures - * that the value returned by this method is most up-to-date - * (equals to the root list's one). + * For embedded token lists this value should be update to root's one + * prior constructing child token sequence. + *

* * @return number of modifications performed to the list. *
@@ -167,33 +198,14 @@ int modCount(); /** - * Get absolute offset of the child token with the given raw offset - * in the underlying input. - * - * @param rawOffset raw offset of the child token. - * @return absolute offset in the input. + * Get the root token list of the token list hierarchy. */ - int childTokenOffset(int rawOffset); + TokenList rootTokenList(); /** - * Get character of a token from the character sequence represented - * by this support. - * - * @param rawOffset raw offset of the child token. - * The given offset value may need to be preprocessed before using (it depends - * on a nature of the token list). - * @param index index inside the token's text that should be returned. - * This value cannot be simply added to the previous parameter - * for mutable token lists as the value could errorneously point - * into a middle of the offset gap then. - * @return appropriate character that the token has requested. + * Get text of the whole input source. */ - char childTokenCharAt(int rawOffset, int index); - - /** - * Get the root token list of the token list hierarchy. - */ - TokenList root(); + CharSequence inputSourceText(); /** * Get token hierarchy operation for this token list or null @@ -271,6 +283,9 @@ * If token filtering is used then the first token may start at higher offset. *
* It's guaranteed that there will be no token starting below this offset. + *
+ * For EmbeddedTokenList a EmbeddingContainer.updateStatus() must be called + * prior this method to obtain up-to-date results. */ int startOffset(); @@ -280,16 +295,19 @@ * If token filtering is used then the last token may end at lower offset. *
* It's guaranteed that there will be no token ending above this offset. + *
+ * For EmbeddedTokenList a EmbeddingContainer.updateStatus() must be called + * prior this method to obtain up-to-date results. */ int endOffset(); /** * Check if this token list is removed from token hierarchy. *
- * Should only be called under the lock of the root token list. + * Should only be called under a lock of a root token list. * * @return true if the token list was removed or false otherwise. */ boolean isRemoved(); - + } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TokenListList.java --- a/lexer/src/org/netbeans/lib/lexer/TokenListList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/TokenListList.java Wed May 28 14:48:55 2008 +0200 @@ -44,14 +44,18 @@ import java.util.List; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.ArrayUtilities; import org.netbeans.lib.editor.util.GapList; -import static org.netbeans.lib.lexer.LexerUtilsConstants.INVALID_STATE; +import org.netbeans.lib.lexer.inc.TokenHierarchyEventInfo; /** * List of token lists that collects all token lists for a given language path. *
* There can be both lists with/without joining of the embedded sections. + * Non-joining TLL gets created when someone asks for TokenHierarchy.tokenSequenceList(). + * Joining TLL gets created if any of the embeddings for the particular language path + * has LanguageEmbedding.joinSections() set to true. *
* Initial implementation attempted to initialize the list of token lists lazily * upon asking for it by client. However there was a problem with fixing @@ -81,78 +85,67 @@ *
* Non-joining embedded token lists' contents will be lexed without token list list assistance. *
- * Joining embedded TLs will need TLL assistance so TLL instance gets created for them. - *
- * If there are mixed joining/non-joining language embedding instances for the same - * language path then the non-joining ones can possibly become initialized (lexed) - * without TLL if they are asked individually. Later when first joining embedding is found - * the token list list will be created and contain both joining and non-joining - * embeddings but the joinSections will be respected for individual lexing. - * Non-joining sections will be lexed individually and the join sections will be lexed as joined. + * JoinTokenList deals with sections joining. *

* * @author Miloslav Metelka */ -public final class TokenListList extends GapList> { +public final class TokenListList extends GapList> { - private final TokenHierarchyOperation operation; + private final TokenList rootTokenList; private final LanguagePath languagePath; - /** - * Whether this token list is holding joint sections embeddings. - *
- * If so it is mandatorily maintained. - */ private boolean joinSections; - + /** * Total count of children. It's maintained to quickly resolve * whether the list may be released. */ private int childrenCount; + - - public TokenListList(TokenHierarchyOperation operation, LanguagePath languagePath) { + public TokenListList(TokenList rootTokenList, LanguagePath languagePath) { super(4); - this.operation = operation; + this.rootTokenList = rootTokenList; this.languagePath = languagePath; // languagePath has size >= 2 assert (languagePath.size() >= 2); - Language language = languagePath.innerLanguage(); + Language language = LexerUtilsConstants.innerLanguage(languagePath); if (languagePath.size() > 2) { - Object relexState = null; - TokenListList parentTokenList = operation.tokenListList(languagePath.parent()); + TokenListList parentTokenList = rootTokenList.tokenHierarchyOperation().tokenListList(languagePath.parent()); for (int parentIndex = 0; parentIndex < parentTokenList.size(); parentIndex++) { TokenList tokenList = parentTokenList.get(parentIndex); - relexState = scanTokenList(tokenList, language, relexState); + scanTokenList(tokenList, language); } } else { // Parent is root token list - scanTokenList(operation.rootTokenList(), language, null); + scanTokenList(rootTokenList, language); + } + + if (joinSections) { + JoinTokenList.init(this, 0, size()); + } else { + // Init individual lists + for (EmbeddedTokenList etl : this) { + etl.initAllTokens(); + } } } - private Object scanTokenList(TokenList tokenList, Language language, Object relexState) { + private void scanTokenList(TokenList tokenList, Language language) { int tokenCount = tokenList.tokenCount(); for (int i = 0; i < tokenCount; i++) { - EmbeddedTokenList etl = EmbeddingContainer.embeddedTokenList( - tokenList, i, language); + // Check for embedded token list of the given language + EmbeddedTokenList etl = EmbeddingContainer.embeddedTokenList(tokenList, i, language, false); if (etl != null) { add(etl); if (etl.embedding().joinSections()) { - joinSections = true; - if (!etl.isInited()) { - etl.init(relexState); - relexState = LexerUtilsConstants.endState(etl, relexState); - } - } else { // Not joining sections -> next section starts with null state - relexState = null; + this.joinSections = true; } } } - return relexState; } public LanguagePath languagePath() { @@ -183,59 +176,42 @@ return (childrenCount > 0); } - public Object relexState(int index) { - // Find the previous non-empty section or non-joining section - Object relexState = INVALID_STATE; - for (int i = index - 1; i >= 0 && relexState == INVALID_STATE; i--) { - relexState = LexerUtilsConstants.endState(get(i)); - } - if (relexState == INVALID_STATE) // Start from real begining - relexState = null; - return relexState; - } - /** * Return a valid token list or null if the index is too high. */ - public EmbeddedTokenList getOrNull(int index) { + public EmbeddedTokenList getOrNull(int index) { return (index < size()) ? get(index) : null; } private static final EmbeddedTokenList[] EMPTY_TOKEN_LIST_ARRAY = new EmbeddedTokenList[0]; - public EmbeddedTokenList[] replace(int index, int removeCount, List> addTokenLists) { - EmbeddedTokenList[] removed; - if (removeCount > 0) { - removed = new EmbeddedTokenList[removeCount]; - copyElements(index, index + removeCount, removed, 0); - remove(index, removeCount); - } else { - removed = EMPTY_TOKEN_LIST_ARRAY; + public EmbeddedTokenList[] replace(int index, int removeTokenListCount, List> addTokenLists) { + @SuppressWarnings("unchecked") + EmbeddedTokenList[] removed = (removeTokenListCount > 0) + ? (EmbeddedTokenList[]) new EmbeddedTokenList[removeTokenListCount] + : (EmbeddedTokenList[]) EMPTY_TOKEN_LIST_ARRAY; + if (removeTokenListCount > 0) { + copyElements(index, index + removeTokenListCount, removed, 0); + remove(index, removeTokenListCount); } - @SuppressWarnings("unchecked") - List> etlLists = (List>)addTokenLists; - addAll(index, etlLists); + addAll(index, addTokenLists); return removed; + } + + public TokenList rootTokenList() { + return rootTokenList; } void childAdded() { throw new UnsupportedOperationException("Not yet implemented"); } - - TokenHierarchyOperation operation() { - return operation; - } - - int modCount() { - return operation.modCount(); - } - + public int findIndex(int offset) { int high = size() - 1; int low = 0; while (low <= high) { - int mid = (low + high) >> 1; - EmbeddedTokenList etl = get(mid); + int mid = (low + high) >>> 1; + EmbeddedTokenList etl = get(mid); // Ensure that the startOffset() will be updated etl.embeddingContainer().updateStatus(); int cmp = etl.startOffset() - offset; @@ -258,22 +234,15 @@ * were removed then these TLs then the token lists beyond the modification point * will be forced to update itself which may */ - public int findIndexDuringUpdate(EmbeddedTokenList targetEtl, int modOffset, int removedLength) { + public int findIndexDuringUpdate(EmbeddedTokenList targetEtl, TokenHierarchyEventInfo eventInfo) { int high = size() - 1; int low = 0; - int targetStartOffset = targetEtl.startOffset(); - if (targetStartOffset > modOffset && targetEtl.embeddingContainer().isRemoved()) { - targetStartOffset = Math.max(targetStartOffset - removedLength, modOffset); - } + int targetStartOffset = LexerUtilsConstants.updatedStartOffset(targetEtl, eventInfo); while (low <= high) { - int mid = (low + high) >> 1; - EmbeddedTokenList etl = get(mid); + int mid = (low + high) >>> 1; + EmbeddedTokenList etl = get(mid); // Ensure that the startOffset() will be updated - etl.embeddingContainer().updateStatusImpl(); - int startOffset = etl.startOffset(); - if (startOffset > modOffset && etl.embeddingContainer().isRemoved()) { - startOffset = Math.max(startOffset - removedLength, modOffset); - } + int startOffset = LexerUtilsConstants.updatedStartOffset(etl, eventInfo); int cmp = startOffset - targetStartOffset; if (cmp < 0) low = mid + 1; @@ -286,39 +255,26 @@ // In such case these need to be searched by linear search in both directions // from the found one. if (etl != targetEtl) { - low--; - while (low >= 0) { + while (--low >= 0) { etl = get(low); if (etl == targetEtl) { // Quick check for match return low; } // Check whether this was appropriate attempt for match - etl.embeddingContainer().updateStatusImpl(); - startOffset = etl.startOffset(); - if (startOffset > modOffset && etl.embeddingContainer().isRemoved()) { - startOffset = Math.max(startOffset - removedLength, modOffset); - } - if (startOffset != modOffset) + if (LexerUtilsConstants.updatedStartOffset(etl, eventInfo) != targetStartOffset) break; - low--; } // Go up from mid - low = mid + 1; - while (low < size()) { + low = mid; + while (++low < size()) { etl = get(low); if (etl == targetEtl) { // Quick check for match return low; } // Check whether this was appropriate attempt for match - etl.embeddingContainer().updateStatusImpl(); - startOffset = etl.startOffset(); - if (startOffset > modOffset && etl.embeddingContainer().isRemoved()) { - startOffset = Math.max(startOffset - removedLength, modOffset); - } - if (startOffset != modOffset) + if (LexerUtilsConstants.updatedStartOffset(etl, eventInfo) != targetStartOffset) break; - low++; } } break; @@ -331,8 +287,11 @@ // Check whether the token lists are in a right order int lastEndOffset = 0; for (int i = 0; i < size(); i++) { - EmbeddedTokenList etl = get(i); - etl.embeddingContainer().updateStatusImpl(); + EmbeddedTokenList etl = get(i); + etl.embeddingContainer().updateStatusUnsync(); + if (etl.isRemoved()) { + return "TOKEN-LIST-LIST Removed token list at index=" + i + '\n' + this; + } if (etl.startOffset() < lastEndOffset) { return "TOKEN-LIST-LIST Invalid start offset at index=" + i + ": etl[" + i + "].startOffset()=" + etl.startOffset() + @@ -353,6 +312,9 @@ } lastEndOffset = etl.endOffset(); } + if (joinSections()) { + return get(0).joinTokenList().checkConsistency(); + } return null; } @@ -370,10 +332,10 @@ sb.append('\n'); int digitCount = ArrayUtilities.digitCount(size()); for (int i = 0; i < size(); i++) { - EmbeddedTokenList etl = get(i); + EmbeddedTokenList etl = get(i); ArrayUtilities.appendBracketedIndex(sb, i, digitCount); etl.embeddingContainer().updateStatus(); - sb.append(etl.toStringHeader()); + etl.dumpInfo(sb); EmbeddingContainer ec = etl.embeddingContainer(); if (ec != null && ec.isRemoved()) { sb.append(", <--REMOVED-->"); diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TokenOrEmbedding.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/TokenOrEmbedding.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,70 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer; + +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.token.AbstractToken; + +/** + * Type for having either token or embedding. + * + * @author Miloslav Metelka + */ + +public interface TokenOrEmbedding { + + /** + * Get token reference + * + * @return this if this is a token instance or + * a wrapped token if this is an embedding container. + */ + AbstractToken token(); + + /** + * Get non-null embedding container if this is embedding. + * + * @return non-null embedding or null if this is token. + */ + EmbeddingContainer embedding(); + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/TokenSequenceList.java --- a/lexer/src/org/netbeans/lib/lexer/TokenSequenceList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/TokenSequenceList.java Wed May 28 14:48:55 2008 +0200 @@ -43,14 +43,12 @@ import java.util.AbstractList; import java.util.ArrayList; -import java.util.ArrayList; import java.util.Collections; import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.api.lexer.TokenId; import org.netbeans.api.lexer.TokenSequence; /** @@ -61,9 +59,9 @@ public final class TokenSequenceList extends AbstractList> { - private TokenHierarchyOperation operation; + private TokenList rootTokenList; - private final TokenListList tokenListList; + private final TokenListList tokenListList; private final List> tokenSequences; @@ -78,16 +76,15 @@ */ private int tokenListIndex; - public TokenSequenceList(TokenHierarchyOperation operation, LanguagePath languagePath, + public TokenSequenceList(TokenList rootTokenList, LanguagePath languagePath, int startOffset, int endOffset) { - this.operation = operation; + this.rootTokenList = rootTokenList; this.endOffset = endOffset; - this.expectedModCount = operation.modCount(); + this.expectedModCount = rootTokenList.modCount(); if (languagePath.size() == 1) { // Is supported too this.tokenListList = null; tokenListIndex = Integer.MAX_VALUE; // Mark no mods to tokenSequences - TokenList rootTokenList = operation.rootTokenList(); if (rootTokenList.languagePath() == languagePath) { TokenSequence rootTS = LexerApiPackageAccessor.get().createTokenSequence( checkWrapTokenList(rootTokenList, startOffset, endOffset)); @@ -97,7 +94,7 @@ } } else { // languagePath.size() >= 2 - this.tokenListList = operation.tokenListList(languagePath); + this.tokenListList = rootTokenList.tokenHierarchyOperation().tokenListList(languagePath); // Possibly skip initial token lists accroding to startOffset int size = tokenListList.size(); int high = size - 1; @@ -108,7 +105,7 @@ int mid = (tokenListIndex + high) / 2; EmbeddedTokenList etl = tokenListList.get(mid); // Update end offset before querying - etl.embeddingContainer().updateStatusImpl(); + etl.embeddingContainer().updateStatusUnsync(); int tlEndOffset = etl.endOffset(); // updateStatusImpl() just called if (tlEndOffset < startOffset) { tokenListIndex = mid + 1; @@ -123,7 +120,7 @@ firstTokenList = tokenListList.getOrNull(tokenListIndex); if (tokenListIndex == size) { // Right above the ones that existed at begining of bin search while (firstTokenList != null) { - firstTokenList.embeddingContainer().updateStatusImpl(); + firstTokenList.embeddingContainer().updateStatusUnsync(); if (firstTokenList.endOffset() >= startOffset) { // updateStatusImpl() just called break; } @@ -136,7 +133,7 @@ } if (firstTokenList != null) { - firstTokenList.embeddingContainer().updateStatusImpl(); + firstTokenList.embeddingContainer().updateStatusUnsync(); tokenSequences = new ArrayList>(4); tokenSequences.add(LexerApiPackageAccessor.get().createTokenSequence( checkWrapTokenList(firstTokenList, startOffset, endOffset))); @@ -207,10 +204,10 @@ } void checkForComodification() { - if (expectedModCount != operation.modCount()) + if (expectedModCount != rootTokenList.modCount()) throw new ConcurrentModificationException( "Caller uses obsolete TokenSequenceList: expectedModCount=" + expectedModCount + // NOI18N - " != modCount=" + operation.modCount() + " != modCount=" + rootTokenList.modCount() ); } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/batch/BatchTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/batch/BatchTokenList.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,255 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.batch; - -import java.util.ArrayList; -import java.util.Set; -import org.netbeans.api.lexer.Language; -import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.lib.lexer.EmbeddingContainer; -import org.netbeans.lib.lexer.LAState; -import org.netbeans.lib.lexer.TokenList; -import org.netbeans.lib.lexer.LexerInputOperation; -import org.netbeans.lib.lexer.LexerUtilsConstants; -import org.netbeans.api.lexer.InputAttributes; -import org.netbeans.api.lexer.Token; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.TokenHierarchyOperation; -import org.netbeans.lib.lexer.token.AbstractToken; -import org.netbeans.lib.lexer.token.TextToken; - - -/** - * Token list used for root list for immutable inputs. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public abstract class BatchTokenList -extends ArrayList implements TokenList { - - /** Flag for additional correctness checks (may degrade performance). */ - private static final boolean testing = Boolean.getBoolean("netbeans.debug.lexer.test"); - - private static boolean maintainLAState; - - /** - * Check whether lookaheads and states are stored for testing purposes. - */ - public static boolean isMaintainLAState() { - return maintainLAState; - } - - public static void setMaintainLAState(boolean maintainLAState) { - BatchTokenList.maintainLAState = maintainLAState; - } - - private final TokenHierarchyOperation tokenHierarchyOperation; - - private final LanguagePath languagePath; - - private final Set skipTokenIds; - - private final InputAttributes inputAttributes; - - /** - * Lexer input used for lexing of the input. - */ - private LexerInputOperation lexerInputOperation; - - private LAState laState; - - private boolean inited; - - - public BatchTokenList(TokenHierarchyOperation tokenHierarchyOperation, - Language language, Set skipTokenIds, InputAttributes inputAttributes) { - this.tokenHierarchyOperation = tokenHierarchyOperation; - this.languagePath = LanguagePath.get(language); - this.skipTokenIds = skipTokenIds; - this.inputAttributes = inputAttributes; - if (testing) { // Maintain lookaheads and states when in test environment - laState = LAState.empty(); - } - } - - public abstract char childTokenCharAt(int rawOffset, int index); - - protected abstract LexerInputOperation createLexerInputOperation(); - - protected void init() { - lexerInputOperation = createLexerInputOperation(); - } - - public TokenList root() { - return this; // this list should always be the root list of the token hierarchy - } - - public TokenHierarchyOperation tokenHierarchyOperation() { - return tokenHierarchyOperation; - } - - public LanguagePath languagePath() { - return languagePath; - } - - public synchronized int tokenCount() { - if (!inited) { - init(); - inited = true; - } - if (lexerInputOperation != null) { // still lexing - tokenOrEmbeddingContainerImpl(Integer.MAX_VALUE); - } - return size(); - } - - public int tokenCountCurrent() { - return size(); - } - - public int childTokenOffset(int rawOffset) { - // Children offsets should be absolute - return rawOffset; - } - - public int tokenOffset(int index) { - Token token = existingToken(index); - int offset; - if (token.isFlyweight()) { - offset = 0; - while (--index >= 0) { - token = existingToken(index); - offset += token.length(); - if (!token.isFlyweight()) { - offset += token.offset(null); - break; - } - } - } else { // non-flyweight offset - offset = token.offset(null); - } - return offset; - } - - public synchronized Object tokenOrEmbeddingContainer(int index) { - return tokenOrEmbeddingContainerImpl(index); - } - - private Object tokenOrEmbeddingContainerImpl(int index) { - if (!inited) { - init(); - inited = true; - } - while (lexerInputOperation != null && index >= size()) { - Token token = lexerInputOperation.nextToken(); - if (token != null) { // lexer returned valid token - add(token); - if (laState != null) { // maintaining lookaheads and states - laState = laState.add(lexerInputOperation.lookahead(), - lexerInputOperation.lexerState()); - } - } else { // no more tokens from lexer - lexerInputOperation.release(); - lexerInputOperation = null; - trimToSize(); - } - } - return (index < size()) ? get(index) : null; - } - - private Token existingToken(int index) { - return LexerUtilsConstants.token(get(index)); - } - - public int lookahead(int index) { - return (laState != null) ? laState.lookahead(index) : -1; - } - - public Object state(int index) { - return (laState != null) ? laState.state(index) : null; - } - - public int startOffset() { - return 0; - } - - public int endOffset() { - int cntM1 = tokenCount() - 1; - if (cntM1 >= 0) - return tokenOffset(cntM1) + LexerUtilsConstants.token(this, cntM1).length(); - return 0; - } - - public boolean isRemoved() { - return false; - } - - public int modCount() { - return -1; // immutable input - } - - public synchronized AbstractToken replaceFlyToken( - int index, AbstractToken flyToken, int offset) { - TextToken nonFlyToken = ((TextToken)flyToken).createCopy(this, offset); - set(index, nonFlyToken); - return nonFlyToken; - } - - public void wrapToken(int index, EmbeddingContainer embeddingContainer) { - set(index, embeddingContainer); - } - - public InputAttributes inputAttributes() { - return inputAttributes; - } - - public boolean isContinuous() { - return (skipTokenIds == null); - } - - public Set skipTokenIds() { - return skipTokenIds; - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/batch/CopyTextTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/batch/CopyTextTokenList.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.batch; - -import java.io.Reader; -import java.util.Set; -import org.netbeans.api.lexer.Language; -import org.netbeans.lib.lexer.LexerInputOperation; -import org.netbeans.api.lexer.InputAttributes; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.TokenHierarchyOperation; - - -/** - * Token list for situation when the input text must be copied. - * It works together with SkimTokenList instances that act - * as a filter over this token list. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public final class CopyTextTokenList extends BatchTokenList { - - /** Either reader or char sequence */ - private final Object input; - - public CopyTextTokenList(TokenHierarchyOperation tokenHierarchyOperation, Reader inputReader, - Language language, Set skipTokenIds, InputAttributes inputAttributes) { - super(tokenHierarchyOperation, language, skipTokenIds, inputAttributes); - this.input = inputReader; - } - - public CopyTextTokenList(TokenHierarchyOperation tokenHierarchyOperation, CharSequence inputText, - Language language, Set skipTokenIds, InputAttributes inputAttributes) { - super(tokenHierarchyOperation, language, skipTokenIds, inputAttributes); - this.input = inputText; - } - - public int childTokenOffset(int rawOffset) { - // Cluster should be used so this method should never be called - throwShouldNeverBeCalled(); - return 0; // never reached - } - - public char childTokenCharAt(int rawOffset, int index) { - // Cluster should be used so this method should never be called - throwShouldNeverBeCalled(); - return ' '; // never reached - } - - private void throwShouldNeverBeCalled() { - throw new IllegalStateException("Should never be called"); // NOI18N - } - - protected LexerInputOperation createLexerInputOperation() { - return (input instanceof Reader) - ? new SkimLexerInputOperation(this, (Reader)input) - : new SkimLexerInputOperation(this, (CharSequence)input); - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/batch/SkimLexerInputOperation.java --- a/lexer/src/org/netbeans/lib/lexer/batch/SkimLexerInputOperation.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,316 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.batch; - -import java.io.IOException; -import java.io.Reader; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.LexerInputOperation; -import org.netbeans.lib.lexer.TokenList; -import org.netbeans.lib.lexer.token.AbstractToken; -import org.netbeans.spi.lexer.LexerInput; - -/** - * Lexer input operation over a {@link java.io.Reader}. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public final class SkimLexerInputOperation extends LexerInputOperation { - - private static final char[] EMPTY_CHAR_ARRAY = new char[0]; - - /** - * Default size for reading char array. - */ - private static final int DEFAULT_READ_CHAR_ARRAY_SIZE = 4096; - - /** - * Minimum size to be read (to have space for reading). - */ - private static final int MIN_READ_SIZE = 512; - - private static final int DEFAULT_CLUSTER_SIZE = 4096; - - /** - * Maximum fragmentation factor for token character arrays. - *
- * If there is not enough space in the tokenCharArray - * to copy a token's characters there then if the token's length - * will be greater than this threshold then the token will get - * an extra character buffer just for itself and there will - * still be chance to use the present tokenCharArray for tokens - * with lower length. - */ - private static final int MAX_UNUSED_CLUSTER_SIZE_FRACTION = 50; - - - /** - * Reader as a primary source of characters that are further - * copied and cached. - */ - private Reader reader; - - /** - * Array holding the read characters. - */ - private char[] readCharArray; - - /** - * Character sequence holding the characters to be read. - */ - private CharSequence readCharSequence; - - /** - * Index of a first character in the token being currently recognized. - */ - private int readStartIndex; - - /** - * End of valid chars in readCharArray (points to first invalid char). - */ - private int readEndIndex; - - /** - * Whether EOF was read from reader already or not. - */ - private boolean eofRead; - - /** - * Actual token cluster where the tokens are being placed. - */ - private SkimTokenList cluster; - - private int clusterTextEndIndex; - - private int defaultClusterSize = DEFAULT_CLUSTER_SIZE; - - /** - * Starting offset of the cluster currently being used. - */ - private int clusterStartOffset; - - /** - * How much the offset is ahead of the token's text offset - * in the cluster. The tokens that get skipped and flyweight tokens - * increase this value because their text is not physically copied - * into the clusters character data but they increase the offset. - */ - private int offsetShift; - - public SkimLexerInputOperation(TokenList tokenList, Reader reader) { - super(tokenList, 0, null); - this.reader = reader; - this.readCharArray = new char[DEFAULT_READ_CHAR_ARRAY_SIZE]; - } - - public SkimLexerInputOperation(TokenList tokenList, CharSequence readCharSequence) { - super(tokenList, 0, null); - this.readCharSequence = readCharSequence; - this.readEndIndex = readCharSequence.length(); - } - - public int read(int index) { // index >= 0 is guaranteed by contract - index += readStartIndex; - if (index < readEndIndex) { - return (readCharArray != null) - ? readCharArray[index] - : readCharSequence.charAt(index); - - } else { // must read next or return EOF - if (!eofRead) { - eofRead = (readCharArray != null) - ? readNextCharArray() - : true; // using readCharSequence -> no more chars - - return read(index); - - } else { - return LexerInput.EOF; - } - } - } - - public char readExisting(int index) { - return (readCharArray != null) - ? readCharArray[index] - : readCharSequence.charAt(index); - } - - public void approveToken(AbstractToken token) { - int tokenLength = token.length(); - if (isSkipToken(token)) { - preventFlyToken(); - skipChars(tokenLength()); - - } else if (token.isFlyweight()) { - assert isFlyTokenAllowed(); - flyTokenAdded(); - skipChars(tokenLength); - - } else { // non-flyweight token => must be L0Token instance - if (clusterTextEndIndex != 0) { // valid cluster exists - // Check whether token fits into cluster's char array - if (tokenLength + clusterTextEndIndex > cluster.getText().length) { - // Cannot fit the token's text into current cluster - finishCluster(); - } - } - - if (clusterTextEndIndex == 0) { // allocate new cluster - int clusterSize = defaultClusterSize; - if (clusterSize < tokenLength) { // cluster just for one token - clusterSize = tokenLength; - } - defaultClusterSize = clusterSize; - cluster = new SkimTokenList((CopyTextTokenList)tokenList(), - clusterStartOffset, new char[clusterSize]); - } - - // Now it's clear that the token will fit into the cluster's text - // TODO for DirectCharSequence use more efficient way - char[] clusterText = cluster.getText(); - if (readCharArray != null) { - System.arraycopy(readCharArray, readStartIndex, clusterText, - clusterTextEndIndex, tokenLength); - } else { // using readCharSequence - for (int i = 0; i < tokenLength; i++) { - clusterText[clusterTextEndIndex + i] - = readCharSequence.charAt(readStartIndex + i); - } - } - - int rawOffset = (offsetShift << 16) | clusterTextEndIndex; - token.setTokenList(cluster); - token.setRawOffset(rawOffset); - clusterTextEndIndex += tokenLength; - clearFlySequence(); - } - - readStartIndex += tokenLength; - tokenApproved(); - } - - private void skipChars(int skipLength) { - if (clusterTextEndIndex != 0) { // cluster already populated - if (offsetShift + skipLength > Short.MAX_VALUE) { - // Cannot advance offset shift without overflowing -> cluster is finished - finishCluster(); - clusterStartOffset += skipLength; - - } else { // relOffset will fit into current cluster - offsetShift += skipLength; - } - - } else { // cluster is null -> can shift cluster's start offset - clusterStartOffset += skipLength; - } - } - - public void finish() { - if (clusterTextEndIndex != 0) { - finishCluster(); - } - } - - private void finishCluster() { - // If there would be too much unused space in the cluster's char array - // then it will be reallocated. - int clusterTextLength = cluster.getText().length; - if (clusterTextLength / MAX_UNUSED_CLUSTER_SIZE_FRACTION - > (clusterTextLength - clusterTextEndIndex) - ) { // Fragmentation -> reallocate cluster's char array - char[] newText = new char[clusterTextEndIndex]; - System.arraycopy(cluster.getText(), 0, newText, 0, clusterTextEndIndex); - cluster.setText(newText); - } - clusterStartOffset += clusterTextEndIndex + offsetShift; - clusterTextEndIndex = 0; - offsetShift = 0; - cluster = null; // cluster no longer valid - } - - private boolean readNextCharArray() { - // Copy everything from present readStartIndex till readEndIndex - int retainLength = readEndIndex - readStartIndex; - int minReadSize = readCharArray.length - retainLength; - char[] newReadCharArray = readCharArray; // by default take original one - if (minReadSize < MIN_READ_SIZE) { // allocate new - // double the current array's size - newReadCharArray = new char[readCharArray.length * 2]; - } - System.arraycopy(readCharArray, readStartIndex, newReadCharArray, 0, retainLength); - readCharArray = newReadCharArray; - readStartIndex = 0; - readEndIndex = retainLength; - - boolean eof = false; - while (readEndIndex < readCharArray.length) { - int readSize; - try { - readSize = reader.read(readCharArray, readEndIndex, - readCharArray.length - readEndIndex); - } catch (IOException e) { - // The exception is silently ignored here - // This should generally not happen - a wrapping reader - // should be used that will catch and process the IO exceptions. - readSize = -1; - } - if (readSize == -1) { - eof = true; - try { - reader.close(); - } catch (IOException e) { - // The exception is silently ignored here - // This should generally not happen - a wrapping reader - // should be used that will catch and process the IO exceptions. - } - break; - } else { - readEndIndex += readSize; - } - } - return eof; - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/batch/SkimTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/batch/SkimTokenList.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,178 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.batch; - -import java.util.Set; -import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.lib.lexer.EmbeddingContainer; -import org.netbeans.lib.lexer.TokenList; -import org.netbeans.api.lexer.InputAttributes; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.TokenHierarchyOperation; -import org.netbeans.lib.lexer.token.AbstractToken; - -/** - * Filtering token list constructed over character array with an independent - * start offset value. - *
- * It is constructed for batch inputs and it implements - * a token list but it only implements translation of raw offsets - * into real offsets and retrieving of the characters of token bodies. - *
- * Other operations are delegated to an original - * token list that really holds the tokens. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public final class SkimTokenList implements TokenList { - - private CopyTextTokenList tokenList; - - private int startOffset; - - private char[] text; - - - public SkimTokenList(CopyTextTokenList tokenList, int startOffset, char[] text) { - this.tokenList = tokenList; - this.startOffset = startOffset; - this.text = text; - } - - public CopyTextTokenList getTokenList() { - return tokenList; - } - - public int startOffset() { - return tokenList.startOffset(); - } - - public int endOffset() { - return tokenList.endOffset(); - } - - public boolean isRemoved() { - return tokenList.isRemoved(); - } - - char[] getText() { - return text; - } - - void setText(char[] text) { - this.text = text; - } - - public int childTokenOffset(int rawOffset) { - int offsetShift = (rawOffset >> 16); - return startOffset + (rawOffset & 0xFFFF) + offsetShift; - } - - public char childTokenCharAt(int rawOffset, int index) { - return text[((rawOffset + index) & 0xFFFF)]; - } - - public int modCount() { - return 0; - } - - public Object tokenOrEmbeddingContainer(int index) { - return tokenList.tokenOrEmbeddingContainer(index); - } - - public AbstractToken replaceFlyToken( - int index, AbstractToken flyToken, int offset) { - return tokenList.replaceFlyToken(index, flyToken, offset); - } - - - public int lookahead(int index) { - return tokenList.lookahead(index); - } - - public Object state(int index) { - return tokenList.state(index); - } - - public int tokenOffset(int index) { - return tokenList.tokenOffset(index); - } - - public int tokenCount() { - return tokenList.tokenCount(); - } - - public int tokenCountCurrent() { - return tokenList.tokenCountCurrent(); - } - - public TokenList root() { - return tokenList.root(); - } - - public TokenHierarchyOperation tokenHierarchyOperation() { - return tokenList.tokenHierarchyOperation(); - } - - public LanguagePath languagePath() { - return tokenList.languagePath(); - } - - public void wrapToken(int index, EmbeddingContainer embeddingContainer) { - tokenList.wrapToken(index, embeddingContainer); - } - - public InputAttributes inputAttributes() { - return tokenList.inputAttributes(); - } - - public boolean isContinuous() { - return tokenList.isContinuous(); - } - - public Set skipTokenIds() { - return tokenList.skipTokenIds(); - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/batch/TextTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/batch/TextTokenList.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.batch; - -import java.util.Set; -import org.netbeans.api.lexer.Language; -import org.netbeans.lib.lexer.LexerInputOperation; -import org.netbeans.lib.lexer.TextLexerInputOperation; -import org.netbeans.api.lexer.InputAttributes; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.TokenHierarchyOperation; - - -/** - * Batch token list over text expressed as character sequence. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public final class TextTokenList extends BatchTokenList { - - private CharSequence inputText; - - public TextTokenList(TokenHierarchyOperation tokenHierarchyOperation, CharSequence inputText, - Language language, Set skipTokenIds, InputAttributes inputAttributes) { - super(tokenHierarchyOperation, language, skipTokenIds, inputAttributes); - this.inputText = inputText; - } - - public char childTokenCharAt(int rawOffset, int index) { - return inputText.charAt(rawOffset + index); // rawOffset is absolute - } - - protected LexerInputOperation createLexerInputOperation() { - return new TextLexerInputOperation(this, inputText); - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/DocumentInput.java --- a/lexer/src/org/netbeans/lib/lexer/inc/DocumentInput.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/DocumentInput.java Wed May 28 14:48:55 2008 +0200 @@ -48,7 +48,6 @@ import javax.swing.text.Document; import org.netbeans.api.lexer.InputAttributes; import org.netbeans.api.lexer.Language; -import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.swing.DocumentListenerPriority; import org.netbeans.lib.editor.util.swing.DocumentUtilities; import org.netbeans.lib.lexer.LanguageManager; diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/FilterSnapshotTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/inc/FilterSnapshotTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/FilterSnapshotTokenList.java Wed May 28 14:48:55 2008 +0200 @@ -46,8 +46,10 @@ import org.netbeans.api.lexer.LanguagePath; import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.lexer.EmbeddingContainer; +import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.lib.lexer.TokenHierarchyOperation; import org.netbeans.lib.lexer.TokenList; +import org.netbeans.lib.lexer.TokenOrEmbedding; import org.netbeans.lib.lexer.token.AbstractToken; /** @@ -99,20 +101,20 @@ return tokenOffsetDiff; } - public Object tokenOrEmbeddingContainer(int index) { - return tokenList.tokenOrEmbeddingContainer(index); + public TokenOrEmbedding tokenOrEmbedding(int index) { + return tokenList.tokenOrEmbedding(index); } public AbstractToken replaceFlyToken(int index, AbstractToken flyToken, int offset) { return tokenList.replaceFlyToken(index, flyToken, offset); } - public int tokenOffset(int index) { - return tokenOffsetDiff + tokenList.tokenOffset(index); + public int tokenOffsetByIndex(int index) { + return tokenOffsetDiff + tokenList.tokenOffsetByIndex(index); } public int modCount() { - return -1; + return LexerUtilsConstants.MOD_COUNT_IMMUTABLE_INPUT; } public int tokenCount() { @@ -127,11 +129,15 @@ return tokenList.languagePath(); } - public int childTokenOffset(int rawOffset) { - throw new IllegalStateException("Unexpected call."); + public int tokenOffset(AbstractToken token) { + return tokenList.tokenOffset(token); } - public char childTokenCharAt(int rawOffset, int index) { + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexBinSearch(this, offset, tokenCount()); + } + + public char charAt(int offset) { throw new IllegalStateException("Unexpected call."); } @@ -139,10 +145,14 @@ tokenList.wrapToken(index, embeddingContainer); } - public TokenList root() { - return tokenList.root(); + public TokenList rootTokenList() { + return tokenList.rootTokenList(); } - + + public CharSequence inputSourceText() { + return rootTokenList().inputSourceText(); + } + public TokenHierarchyOperation tokenHierarchyOperation() { return tokenList.tokenHierarchyOperation(); } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/IncTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/inc/IncTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/IncTokenList.java Wed May 28 14:48:55 2008 +0200 @@ -54,11 +54,11 @@ import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.api.lexer.InputAttributes; import org.netbeans.api.lexer.Language; -import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.lexer.TokenHierarchyOperation; import org.netbeans.lib.lexer.token.AbstractToken; import org.netbeans.lib.lexer.token.TextToken; +import org.netbeans.lib.lexer.TokenOrEmbedding; import org.netbeans.spi.lexer.MutableTextInput; @@ -80,13 +80,13 @@ */ public final class IncTokenList -extends FlyOffsetGapList implements MutableTokenList { +extends FlyOffsetGapList> implements MutableTokenList { private final TokenHierarchyOperation tokenHierarchyOperation; private LanguagePath languagePath; - private CharSequence text; + private CharSequence inputSourceText; /** * Lexer input operation used for lexing of the input. @@ -109,30 +109,22 @@ public void reinit() { if (languagePath != null) { MutableTextInput input = tokenHierarchyOperation.mutableTextInput(); - this.text = LexerSpiPackageAccessor.get().text(input); - this.lexerInputOperation = new TextLexerInputOperation(this, text); + this.inputSourceText = LexerSpiPackageAccessor.get().text(input); + this.lexerInputOperation = new TextLexerInputOperation(this); } else { - this.text = null; + this.inputSourceText = null; releaseLexerInputOperation(); } this.laState = LAState.empty(); } - private void releaseLexerInputOperation() { - if (lexerInputOperation != null) + public void releaseLexerInputOperation() { + if (lexerInputOperation != null) { lexerInputOperation.release(); + lexerInputOperation = null; + } } - public void refreshLexerInputOperation() { - releaseLexerInputOperation(); - int lastTokenIndex = tokenCountCurrent() - 1; - lexerInputOperation = createLexerInputOperation( - lastTokenIndex + 1, - existingTokensEndOffset(), - (lastTokenIndex >= 0) ? state(lastTokenIndex) : null - ); - } - public LanguagePath languagePath() { return languagePath; } @@ -152,28 +144,24 @@ public synchronized int tokenCount() { if (lexerInputOperation != null) { // still lexing - tokenOrEmbeddingContainerImpl(Integer.MAX_VALUE); + tokenOrEmbeddingImpl(Integer.MAX_VALUE); } return size(); } - public char childTokenCharAt(int rawOffset, int index) { - index += childTokenOffset(rawOffset); - return text.charAt(index); - } - - public int childTokenOffset(int rawOffset) { + public int tokenOffset(AbstractToken token) { + int rawOffset = token.rawOffset(); return (rawOffset < offsetGapStart() ? rawOffset : rawOffset - offsetGapLength()); } - - public int tokenOffset(int index) { + + public int tokenOffsetByIndex(int index) { return elementOffset(index); } - public int existingTokensEndOffset() { - return elementOrEndOffset(tokenCountCurrent()); + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexLazyTokenCreation(this, offset); } /** @@ -188,21 +176,20 @@ rootModCount++; } - public synchronized Object tokenOrEmbeddingContainer(int index) { - return tokenOrEmbeddingContainerImpl(index); + public synchronized TokenOrEmbedding tokenOrEmbedding(int index) { + return tokenOrEmbeddingImpl(index); } - private Object tokenOrEmbeddingContainerImpl(int index) { + private TokenOrEmbedding tokenOrEmbeddingImpl(int index) { while (lexerInputOperation != null && index >= size()) { - Token token = lexerInputOperation.nextToken(); + AbstractToken token = lexerInputOperation.nextToken(); if (token != null) { // lexer returned valid token updateElementOffsetAdd(token); add(token); laState = laState.add(lexerInputOperation.lookahead(), lexerInputOperation.lexerState()); } else { // no more tokens from lexer - lexerInputOperation.release(); - lexerInputOperation = null; + releaseLexerInputOperation(); trimToSize(); laState.trimToSize(); } @@ -217,7 +204,7 @@ return nonFlyToken; } - public synchronized void wrapToken(int index, EmbeddingContainer embeddingContainer) { + public synchronized void wrapToken(int index, EmbeddingContainer embeddingContainer) { set(index, embeddingContainer); } @@ -225,36 +212,30 @@ return LexerSpiPackageAccessor.get().inputAttributes(tokenHierarchyOperation.mutableTextInput()); } - protected int elementRawOffset(Object elem) { - return LexerUtilsConstants.token(elem).rawOffset(); + protected int elementRawOffset(TokenOrEmbedding elem) { + return elem.token().rawOffset(); } - protected void setElementRawOffset(Object elem, int rawOffset) { - LexerUtilsConstants.token(elem).setRawOffset(rawOffset); + protected void setElementRawOffset(TokenOrEmbedding elem, int rawOffset) { + elem.token().setRawOffset(rawOffset); } - protected boolean isElementFlyweight(Object elem) { + protected boolean isElementFlyweight(TokenOrEmbedding elem) { // token wrapper always contains non-flyweight token - return (elem.getClass() != EmbeddingContainer.class) - && ((AbstractToken)elem).isFlyweight(); + return (elem.embedding() == null) + && elem.token().isFlyweight(); } - protected int elementLength(Object elem) { - return LexerUtilsConstants.token(elem).length(); + protected int elementLength(TokenOrEmbedding elem) { + return elem.token().length(); } - private AbstractToken existingToken(int index) { - // Must use synced tokenOrEmbeddingContainer() because of possible change - // of the underlying list impl when adding lazily requested tokens - return LexerUtilsConstants.token(tokenOrEmbeddingContainer(index)); - } - - public Object tokenOrEmbeddingContainerUnsync(int index) { + public TokenOrEmbedding tokenOrEmbeddingUnsync(int index) { // Solely for token list updater or token hierarchy snapshots // having single-threaded exclusive write access return get(index); } - + public int lookahead(int index) { return laState.lookahead(index); } @@ -267,8 +248,12 @@ return size(); } - public TokenList root() { + public TokenList rootTokenList() { return this; + } + + public CharSequence inputSourceText() { + return inputSourceText; } public TokenHierarchyOperation tokenHierarchyOperation() { @@ -277,74 +262,89 @@ public LexerInputOperation createLexerInputOperation( int tokenIndex, int relexOffset, Object relexState) { + // Possibly release unfinished lexing - will be restarted in replaceTokens() + // Releasing the lexer now allows to share a single backing lexer's impl instance better. + // Do not assign null to lexerInputOperation since the replaceTokens() would not know + // that the lexing was unfinished. + if (lexerInputOperation != null) + lexerInputOperation.release(); + // Used for mutable lists only so maintain LA and state return new TextLexerInputOperation(this, tokenIndex, relexState, - text, 0, relexOffset, text.length()); + relexOffset, inputSourceText.length()); } public boolean isFullyLexed() { return (lexerInputOperation == null); } - public void replaceTokens(TokenListChange change, int removeTokenCount, int diffLength) { + public void replaceTokens(TokenListChange change, int diffLength) { int index = change.index(); // Remove obsolete tokens (original offsets are retained) - Object[] removedTokensOrEmbeddingContainers = new Object[removeTokenCount]; - copyElements(index, index + removeTokenCount, removedTokensOrEmbeddingContainers, 0); - int offset = change.offset(); - for (int i = 0; i < removeTokenCount; i++) { - Object tokenOrEmbeddingContainer = removedTokensOrEmbeddingContainers[i]; - AbstractToken token; - // It's necessary to update-status of all removed tokens' contained embeddings - // since otherwise (if they would not be up-to-date) they could not be updated later - // as they lose their parent token list which the update-status relies on. - if (tokenOrEmbeddingContainer.getClass() == EmbeddingContainer.class) { - EmbeddingContainer ec = (EmbeddingContainer)tokenOrEmbeddingContainer; - ec.updateStatusAndInvalidate(); - token = ec.token(); - } else { // Regular token - token = (AbstractToken)tokenOrEmbeddingContainer; + int removeTokenCount = change.removedTokenCount(); + AbstractToken firstRemovedToken = null; + if (removeTokenCount > 0) { + @SuppressWarnings("unchecked") + TokenOrEmbedding[] removedTokensOrEmbeddings = new TokenOrEmbedding[removeTokenCount]; + copyElements(index, index + removeTokenCount, removedTokensOrEmbeddings, 0); + firstRemovedToken = removedTokensOrEmbeddings[0].token(); + for (int i = 0; i < removeTokenCount; i++) { + TokenOrEmbedding tokenOrEmbedding = removedTokensOrEmbeddings[i]; + // It's necessary to update-status of all removed tokens' contained embeddings + // since otherwise (if they would not be up-to-date) they could not be updated later + // as they lose their parent token list which the update-status relies on. + EmbeddingContainer ec = tokenOrEmbedding.embedding(); + if (ec != null) { + assert (ec.cachedModCount() != rootModCount) : "ModCount already updated"; // NOI18N + ec.updateStatusUnsyncAndMarkRemoved(); + } + AbstractToken token = tokenOrEmbedding.token(); + if (!token.isFlyweight()) { + updateElementOffsetRemove(token); + token.setTokenList(null); + } } - if (!token.isFlyweight()) { - updateElementOffsetRemove(token); - token.setTokenList(null); - } - offset += token.length(); + remove(index, removeTokenCount); // Retain original offsets + laState.remove(index, removeTokenCount); // Remove lookaheads and states + change.setRemovedTokens(removedTokensOrEmbeddings); } - remove(index, removeTokenCount); // Retain original offsets - laState.remove(index, removeTokenCount); // Remove lookaheads and states - change.setRemovedTokens(removedTokensOrEmbeddingContainers); - change.setRemovedEndOffset(offset); // Move and fix the gap according to the performed modification. + // Instead of modOffset the gap is located at first relexed token's start + // because then the already precomputed index corresponding to the given offset + // can be reused. Otherwise there would have to be another binary search for index. if (offsetGapStart() != change.offset()) { // Minimum of the index of the first removed index and original computed index - moveOffsetGap(change.offset(), Math.min(index, change.offsetGapIndex())); + moveOffsetGap(change.offset(), change.index()); } updateOffsetGapLength(-diffLength); // Add created tokens. - List addedTokensOrBranches = change.addedTokensOrBranches(); - if (addedTokensOrBranches != null) { - for (Object tokenOrBranch : addedTokensOrBranches) { - @SuppressWarnings("unchecked") - AbstractToken token = (AbstractToken)tokenOrBranch; - updateElementOffsetAdd(token); + List> addedTokensOrEmbeddings = change.addedTokenOrEmbeddings(); + if (addedTokensOrEmbeddings != null && addedTokensOrEmbeddings.size() > 0) { + for (TokenOrEmbedding tokenOrEmbedding : addedTokensOrEmbeddings) { + updateElementOffsetAdd(tokenOrEmbedding.token()); } - addAll(index, addedTokensOrBranches); + addAll(index, addedTokensOrEmbeddings); laState = laState.addAll(index, change.laState()); change.syncAddedTokenCount(); // Check for bounds change only - if (removeTokenCount == 1 && addedTokensOrBranches.size() == 1) { + if (removeTokenCount == 1 && addedTokensOrEmbeddings.size() == 1) { // Compare removed and added token ids and part types - AbstractToken removedToken = LexerUtilsConstants.token(removedTokensOrEmbeddingContainers[0]); AbstractToken addedToken = change.addedToken(0); - if (removedToken.id() == addedToken.id() - && removedToken.partType() == addedToken.partType() + if (firstRemovedToken.id() == addedToken.id() + && firstRemovedToken.partType() == addedToken.partType() ) { change.markBoundsChange(); } } + } + + // Possibly restart unfinished lexing + if (this.lexerInputOperation != null) { // Lexing was not finished before update + int tokenCount = tokenCountCurrent(); + lexerInputOperation = createLexerInputOperation(tokenCount, elementOrEndOffset(tokenCount), + (tokenCount > 0) ? state(tokenCount - 1) : null); } } @@ -356,16 +356,21 @@ return null; } + @Override public int startOffset() { return 0; } public int endOffset() { - return text.length(); + return inputSourceText.length(); } public boolean isRemoved() { return false; // Should never become removed + } + + public void setInputSourceText(CharSequence text) { + this.inputSourceText = text; } @Override @@ -373,12 +378,4 @@ return LexerUtilsConstants.appendTokenList(null, this).toString(); } - public CharSequence text() { - return text; - } - - public void setText(CharSequence text) { - this.text = text; - } - } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/JoinTokenListChange.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/inc/JoinTokenListChange.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,282 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.inc; + +import java.util.ArrayList; +import java.util.List; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.EmbeddedJoinInfo; +import org.netbeans.lib.lexer.EmbeddedTokenList; +import org.netbeans.lib.lexer.JoinLexerInputOperation; +import org.netbeans.lib.lexer.JoinTokenListBase; +import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.token.JoinToken; +import org.netbeans.lib.lexer.token.PartToken; + +/** + * Token list change for join token lists. + * + * @author Miloslav Metelka + */ +final class JoinTokenListChange extends TokenListChange { + + /** ETL where character modification occurred. */ + EmbeddedTokenList charModTokenList; + + private TokenListListUpdate tokenListListUpdate; + + private int startRelexTokenListIndex; + + private List> relexChanges; + + private JoinLexerInputOperation joinLexerInputOperation; + + public JoinTokenListChange(MutableJoinTokenList tokenList) { + super(tokenList); + } + + public List> relexChanges() { + return relexChanges; + } + + public TokenListListUpdate tokenListListUpdate() { + return tokenListListUpdate; + } + + public void setTokenListListUpdate(TokenListListUpdate tokenListListUpdate) { + this.tokenListListUpdate = tokenListListUpdate; + } + + public void setStartInfo(JoinLexerInputOperation joinLexerInputOperation, int localIndex) { + this.joinLexerInputOperation = joinLexerInputOperation; + this.startRelexTokenListIndex = joinLexerInputOperation.activeTokenListIndex(); + this.relexChanges = new ArrayList>( + tokenListListUpdate.addedTokenLists.size() + 3); + // Add first change now to incorporate starting modified token index + RelexTokenListChange firstChange = new RelexTokenListChange( + joinLexerInputOperation.tokenList(startRelexTokenListIndex)); + // Set index in ETL to properly do replaceTokens() in ETL + // Setting both index and offset is BTW necessary in order to properly move offset gap in ETL + firstChange.setIndex(localIndex); + int relexOffset = joinLexerInputOperation.lastTokenEndOffset(); + firstChange.setOffset(relexOffset); + firstChange.setMatchOffset(relexOffset); // Due to removeLastAddedToken() and etc. + relexChanges.add(firstChange); + } + + @Override + public void addToken(AbstractToken token, int lookahead, Object state) { + // Check if lexer-input-operation advanced to next list and possibly add corresponding relex change(s) + int activeTokenListIndex = joinLexerInputOperation.activeTokenListIndex(); + while (startRelexTokenListIndex + relexChanges.size() <= activeTokenListIndex) { + // Use JLIO.tokenList() since it already contains the removed/added ETLs. + EmbeddedTokenList etl = joinLexerInputOperation.tokenList( + startRelexTokenListIndex + relexChanges.size()); + RelexTokenListChange relexChange = new RelexTokenListChange(etl); + int startOffset = etl.startOffset(); + relexChange.setOffset(startOffset); + relexChanges.add(relexChange); + } + int relexChangeIndex = activeTokenListIndex - startRelexTokenListIndex; + if (token.getClass() == JoinToken.class) { + JoinToken joinToken = (JoinToken) token; + List> joinedParts = joinToken.joinedParts(); + int extraTokenListSpanCount = joinToken.extraTokenListSpanCount(); + int startRelexChangeIndex = relexChangeIndex - extraTokenListSpanCount; + int joinedPartIndex = 0; + // Only add without the last part (will be added normally outside the loop) + // The last ETL can not be empty (must contain the last non-empty token part) + for (int i = 0; i < extraTokenListSpanCount; i++) { + RelexTokenListChange relexChange = relexChanges.get(startRelexChangeIndex + i); + // Check whether token list is non-empty by checking a text length that it covers. + // Do not use etl.tokenCount() since the tokens are just being added into ETL. + EmbeddedTokenList etl = (EmbeddedTokenList) relexChange.tokenList(); + if (etl.textLength() > 0) { + PartToken partToken = joinedParts.get(joinedPartIndex++); + relexChange.addToken(partToken, 0, null); + } + relexChange.joinTokenLastPartShift = extraTokenListSpanCount - i; + } + // Last part will be added normally by subsequent code + token = joinedParts.get(joinedPartIndex); // Should be (joinedParts.size()-1) + } + RelexTokenListChange relexChange = relexChanges.get(relexChangeIndex); + relexChange.addToken(token, lookahead, state); + } + + @Override + public AbstractToken removeLastAddedToken() { + RelexTokenListChange lastRelexChange; + AbstractToken lastAddedToken = null; + do { + lastRelexChange = relexChanges.get(relexChanges.size() - 1); + int tokenCount = lastRelexChange.tokenList().tokenCountCurrent(); + if (lastRelexChange.tokenList().tokenCountCurrent() > 0) { // There might be empty ETLs + lastAddedToken = lastRelexChange.removeLastAddedToken(); + tokenCount--; + } + if (tokenCount == 0) { + relexChanges.remove(relexChanges.size() - 1); + } + } while (lastAddedToken.getClass() != PartToken.class || + ((PartToken) lastAddedToken).joinToken().joinedParts().get(0) != lastAddedToken); + matchIndex--; + matchOffset = lastRelexChange.matchOffset; + // If lastAddedToken is PartToken then its joinToken() should be returned + // but the return value should currently be ignored + return lastAddedToken; + } + + public void replaceTokens(int diffLength) { + // Determine position of matchIndex in token lists + // if matchIndex == jtl.tokenCount() the token list index will be the last list + // and endLocalIndex will be its tokenCount(). Because of this + // there must be a check whether token list index is not among removed ETLs. + MutableJoinTokenList jtl = (MutableJoinTokenList) tokenList(); + int endLocalIndex = jtl.tokenStartLocalIndex(matchIndex); + int matchTokenListIndex = jtl.activeTokenListIndex(); + if (matchTokenListIndex >= tokenListListUpdate.modTokenListIndex + tokenListListUpdate.removedTokenListCount) { + assert (matchIndex == jtl.tokenCountCurrent()); // Should only happen in this situation + // Project into relexChanges + matchTokenListIndex += tokenListListUpdate.tokenListCountDiff(); + relexChanges.get(matchTokenListIndex - startRelexTokenListIndex).setMatchIndex(endLocalIndex); + int afterAddIndex = tokenListListUpdate.modTokenListIndex + tokenListListUpdate.addedTokenLists.size(); + while (--matchTokenListIndex >= afterAddIndex) { + TokenListChange change = relexChanges.get(matchTokenListIndex - startRelexTokenListIndex); + change.setMatchIndex(change.tokenList().tokenCountCurrent()); + } + } + // Fill in the below-mod-ETLs area + int index = tokenListListUpdate.modTokenListIndex; + while (--index >= startRelexTokenListIndex) { + TokenListChange change = relexChanges.get(index - startRelexTokenListIndex); + change.setMatchIndex(change.tokenList().tokenCountCurrent()); + } + + // Physically replace the token lists + JoinTokenListBase base = jtl.base(); + if (tokenListListUpdate.isTokenListsMod()) { + // Move gap after last ETL that was relexed (obsolete ETLs still not removed) + int relexEndIndex = startRelexTokenListIndex + relexChanges.size(); + int relexEndOldIndex = relexEndIndex - tokenListListUpdate.tokenListCountDiff(); + base.moveIndexGap(jtl.tokenListList(), jtl.tokenListStartIndex(), relexEndOldIndex); + // Do physical ETLs replace + jtl.tokenListList().replace(jtl.tokenListStartIndex() + tokenListListUpdate.modTokenListIndex, + tokenListListUpdate.removedTokenListCount, tokenListListUpdate.addedTokenLists); + base.tokenListModNotify(tokenListListUpdate.tokenListCountDiff()); + } + + // Remember join token count right before the first relexed ETL + int joinTokenIndex; + if (startRelexTokenListIndex > 0) { + EmbeddedTokenList etl = jtl.tokenList(startRelexTokenListIndex - 1); + joinTokenIndex = etl.joinInfo.joinTokenIndex() + etl.joinTokenCount(); // Physical removal already performed + } else { + joinTokenIndex = 0; + } + // Now process each relex change and update join token count etc. + int relexChangesSizeM1 = relexChanges.size() - 1; + int i; + for (i = 0; i <= relexChangesSizeM1; i++) { + RelexTokenListChange change = relexChanges.get(i); + EmbeddedTokenList etl = (EmbeddedTokenList) change.tokenList(); + if (etl.joinInfo == null) { + etl.joinInfo = new EmbeddedJoinInfo(base, joinTokenIndex, startRelexTokenListIndex + i); + } else { + etl.joinInfo.setRawJoinTokenIndex(joinTokenIndex); + } + // Set new joinTokenLastPartShift before calling etl.joinTokenCount() + etl.joinInfo.setJoinTokenLastPartShift(change.joinTokenLastPartShift); + // Replace tokens in the individual ETL + int realDiffLength = (etl == charModTokenList) ? diffLength : 0; + etl.replaceTokens(change, realDiffLength); + // Fix join token count + joinTokenIndex += etl.joinTokenCount(); + } + + // Now fix the total join token count + int origJoinTokenIndex = (i < jtl.tokenListCount()) + ? jtl.tokenList(i).joinInfo.joinTokenIndex() + : base.joinTokenCount(); + int joinTokenCountDiff = joinTokenIndex - origJoinTokenIndex; + base.updateJoinTokenCount(joinTokenCountDiff); + + if (relexChangesSizeM1 == 0) { // Only changed single ETL + if (relexChanges.get(0).isBoundsChange()) { + markBoundsChange(); // Joined change treated as bounds change too + } + } + } + + @Override + public String toString() { + return super.toString() + ", tokenListListUpdate=" + tokenListListUpdate + // NOI18N + ", startRelexTokenListIndex=" + startRelexTokenListIndex + // NOI18N + ", relexChanges.size()=" + relexChanges.size(); + } + + @Override + public String toStringMods(int indent) { + StringBuilder sb = new StringBuilder(100); + for (RelexTokenListChange change : relexChanges) { + sb.append(change.toStringMods(indent)); + sb.append('\n'); + } + return sb.toString(); + } + + static final class RelexTokenListChange extends TokenListChange { + + int joinTokenLastPartShift; // New value for EmbeddedJoinInfo.joinTokenLastPartShift during relex + + RelexTokenListChange(EmbeddedTokenList tokenList) { + super(tokenList); + } + + @Override + public String toString() { + return super.toString() + ", lps=" + joinTokenLastPartShift; + } + + } + +} \ No newline at end of file diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/MutableJoinLexerInputOperation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/inc/MutableJoinLexerInputOperation.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,93 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.inc; + +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.EmbeddedTokenList; +import org.netbeans.lib.lexer.JoinLexerInputOperation; +import org.netbeans.lib.lexer.JoinTokenList; + +/** + * Lexer input operation over multiple joined sections (embedded token lists). + *
+ * It produces regular tokens (to be added directly into ETL represented by + * {@link #activeTokenList()} and also special {@link #JoinToken} instances + * in case a token spans boundaries of multiple ETLs. + *
+ * It can either work over JoinTokenList directly or, during a modification, + * it simulates that certain token lists are already removed/added to underlying token list. + *
+ * + * {@link #recognizedTokenLastInTokenList()} gives information whether the lastly + * produced token ends right at boundary of the activeTokenList. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +class MutableJoinLexerInputOperation extends JoinLexerInputOperation { + + private TokenListListUpdate tokenListListUpdate; + + MutableJoinLexerInputOperation(JoinTokenList joinTokenList, int relexJoinIndex, Object lexerRestartState, + int activeTokenListIndex, int relexOffset, TokenListListUpdate tokenListListUpdate + ) { + super(joinTokenList, relexJoinIndex, lexerRestartState, activeTokenListIndex, relexOffset); + this.tokenListListUpdate = tokenListListUpdate; + } + + @Override + public EmbeddedTokenList tokenList(int tokenListIndex) { + return tokenListListUpdate.afterUpdateTokenList((JoinTokenList) tokenList, tokenListIndex); + } + + @Override + protected int tokenListCount() { + return tokenListListUpdate.afterUpdateTokenListCount((JoinTokenList) tokenList); + } + + @Override + public String toString() { + return super.toString() + ", tokenListListUpdate: " + tokenListListUpdate; // NOI18N + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/MutableJoinTokenList.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/inc/MutableJoinTokenList.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,96 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.inc; + +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.EmbeddedTokenList; +import org.netbeans.lib.lexer.JoinTokenList; +import org.netbeans.lib.lexer.JoinTokenListBase; +import org.netbeans.lib.lexer.LexerInputOperation; +import org.netbeans.lib.lexer.TokenListList; +import org.netbeans.lib.lexer.TokenOrEmbedding; + + +/** + * Mutable join token list allows mutations by token list updater. + * + * @author Miloslav Metelka + */ + +class MutableJoinTokenList extends JoinTokenList implements MutableTokenList { + + static MutableJoinTokenList create(TokenListList tokenListList, int etlIndex, EmbeddedTokenList etl) { + int tokenListStartIndex = etlIndex - etl.joinInfo.tokenListIndex(); + MutableJoinTokenList jtl = new MutableJoinTokenList(tokenListList, etl.joinInfo.base, tokenListStartIndex); + // Position to this etl's join index + jtl.setActiveTokenListIndex(etlIndex - tokenListStartIndex); + return jtl; + } + + MutableJoinTokenList(TokenListList tokenListList, JoinTokenListBase base, int tokenListStartIndex) { + super(tokenListList, base, tokenListStartIndex); + } + + public TokenOrEmbedding tokenOrEmbeddingUnsync(int index) { + return tokenOrEmbedding(index); + } + + public boolean isFullyLexed() { + return true; + } + + public void replaceTokens(TokenListChange change, int diffLength) { + ((JoinTokenListChange) change).replaceTokens(diffLength); + } + + public LexerInputOperation createLexerInputOperation(int tokenIndex, int relexOffset, Object relexState) { + // Should never be called + throw new IllegalStateException("Should never be called"); // NOI18N + } + + public void resetActiveAfterUpdate() { // Update the active token list after updating + activeTokenListIndex = 0; + fetchActiveTokenListData(); + } + +} + diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/MutableTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/inc/MutableTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/MutableTokenList.java Wed May 28 14:48:55 2008 +0200 @@ -44,6 +44,7 @@ import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.lexer.LexerInputOperation; import org.netbeans.lib.lexer.TokenList; +import org.netbeans.lib.lexer.TokenOrEmbedding; /** * Token list that allows mutating by token list mutator. @@ -61,8 +62,8 @@ * Also do not perform any checks regarding index validity * - only items below {@link #tokenCountCurrent()} will be requested. */ - Object tokenOrEmbeddingContainerUnsync(int index); - + TokenOrEmbedding tokenOrEmbeddingUnsync(int index); + /** * Create lexer input operation used for relexing of the input. */ @@ -80,6 +81,6 @@ /** * Update the token list by replacing tokens according to the given change. */ - void replaceTokens(TokenListChange change, int removeTokenCount, int diffLength); + void replaceTokens(TokenListChange change, int diffLength); } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/RemovedTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/inc/RemovedTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/RemovedTokenList.java Wed May 28 14:48:55 2008 +0200 @@ -52,6 +52,7 @@ import org.netbeans.lib.lexer.TokenList; import org.netbeans.lib.lexer.token.AbstractToken; import org.netbeans.lib.lexer.token.TextToken; +import org.netbeans.lib.lexer.TokenOrEmbedding; /** * Token list implementation holding added or removed tokens from a list. @@ -64,21 +65,21 @@ private final LanguagePath languagePath; - private Object[] tokensOrBranches; + private TokenOrEmbedding[] tokenOrEmbeddings; private int removedTokensStartOffset; - public RemovedTokenList(LanguagePath languagePath, Object[] tokensOrBranches) { + public RemovedTokenList(LanguagePath languagePath, TokenOrEmbedding[] tokensOrBranches) { this.languagePath = languagePath; - this.tokensOrBranches = tokensOrBranches; + this.tokenOrEmbeddings = tokensOrBranches; } public LanguagePath languagePath() { return languagePath; } - public Object tokenOrEmbeddingContainer(int index) { - return (index < tokensOrBranches.length) ? tokensOrBranches[index] : null; + public TokenOrEmbedding tokenOrEmbedding(int index) { + return (index < tokenOrEmbeddings.length) ? tokenOrEmbeddings[index] : null; } public int lookahead(int index) { @@ -89,7 +90,7 @@ return null; } - public int tokenOffset(int index) { + public int tokenOffsetByIndex(int index) { Token token = existingToken(index); if (token.isFlyweight()) { int offset = 0; @@ -109,14 +110,18 @@ } } + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexBinSearch(this, offset, tokenCountCurrent()); + } + private Token existingToken(int index) { - return LexerUtilsConstants.token(tokensOrBranches[index]); + return tokenOrEmbeddings[index].token(); } public synchronized AbstractToken replaceFlyToken( int index, AbstractToken flyToken, int offset) { TextToken nonFlyToken = ((TextToken)flyToken).createCopy(this, offset); - tokensOrBranches[index] = nonFlyToken; + tokenOrEmbeddings[index] = nonFlyToken; return nonFlyToken; } @@ -125,19 +130,20 @@ } public int tokenCountCurrent() { - return tokensOrBranches.length; + return tokenOrEmbeddings.length; } public int modCount() { - return -1; + return LexerUtilsConstants.MOD_COUNT_IMMUTABLE_INPUT; } - public int childTokenOffset(int rawOffset) { + public int tokenOffset(AbstractToken token) { + int rawOffset = token.rawOffset(); // Offsets of contained tokens are absolute return rawOffset; } - public char childTokenCharAt(int rawOffset, int index) { + public char charAt(int offset) { throw new IllegalStateException("Querying of text for removed tokens not supported"); // NOI18N } @@ -145,10 +151,14 @@ throw new IllegalStateException("Branching of removed tokens not supported"); // NOI18N } - public TokenList root() { - return this; + public TokenList rootTokenList() { + return null; } - + + public CharSequence inputSourceText() { + return null; + } + public TokenHierarchyOperation tokenHierarchyOperation() { return null; } @@ -159,14 +169,14 @@ public int startOffset() { if (tokenCountCurrent() > 0 || tokenCount() > 0) - return tokenOffset(0); + return tokenOffsetByIndex(0); return 0; } public int endOffset() { int cntM1 = tokenCount() - 1; if (cntM1 >= 0) - return tokenOffset(cntM1) + LexerUtilsConstants.token(this, cntM1).length(); + return tokenOffsetByIndex(cntM1) + tokenOrEmbedding(cntM1).token().length(); return 0; } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/SnapshotTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/inc/SnapshotTokenList.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/SnapshotTokenList.java Wed May 28 14:48:55 2008 +0200 @@ -44,7 +44,6 @@ import java.util.Set; import org.netbeans.api.lexer.InputAttributes; import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.CompactMap; import org.netbeans.lib.lexer.EmbeddedTokenList; @@ -52,6 +51,7 @@ import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.lib.lexer.TokenHierarchyOperation; import org.netbeans.lib.lexer.TokenList; +import org.netbeans.lib.lexer.TokenOrEmbedding; import org.netbeans.lib.lexer.token.AbstractToken; import org.netbeans.lib.lexer.token.TextToken; @@ -79,7 +79,7 @@ private int liveTokenOffsetDiff; /** Captured original tokens or branches. */ - private Object[] origTokensOrBranches; + private TokenOrEmbedding[] origTokenOrEmbeddings; /** Original token's offsets. The array is occupied * and maintained in the same way like origTokensOrBranches. @@ -118,15 +118,15 @@ return liveTokenList.languagePath(); } - public Object tokenOrEmbeddingContainer(int index) { + public TokenOrEmbedding tokenOrEmbedding(int index) { if (liveTokenGapStart == -1 || index < liveTokenGapStart) { - return liveTokenList.tokenOrEmbeddingContainer(index); + return liveTokenList.tokenOrEmbedding(index); } index -= liveTokenGapStart; if (index < origTokenCount) { - return origTokensOrBranches[origTokenStartIndex + index]; + return origTokenOrEmbeddings[origTokenStartIndex + index]; } - return liveTokenList.tokenOrEmbeddingContainer(liveTokenGapEnd + index - origTokenCount); + return liveTokenList.tokenOrEmbedding(liveTokenGapEnd + index - origTokenCount); } public int lookahead(int index) { @@ -141,9 +141,9 @@ return null; } - public int tokenOffset(int index) { + public int tokenOffsetByIndex(int index) { if (liveTokenGapStart == -1 || index < liveTokenGapStart) { - return liveTokenList.tokenOffset(index); + return liveTokenList.tokenOffsetByIndex(index); } index -= liveTokenGapStart; if (index < origTokenCount) { @@ -151,32 +151,34 @@ } index -= origTokenCount; - AbstractToken token = LexerUtilsConstants.token(liveTokenList. - tokenOrEmbeddingContainerUnsync(liveTokenGapEnd + index)); + AbstractToken token = liveTokenList.tokenOrEmbeddingUnsync(liveTokenGapEnd + index).token(); int offset; if (token.isFlyweight()) { offset = token.length(); while (--index >= 0) { - token = LexerUtilsConstants.token(liveTokenList. - tokenOrEmbeddingContainerUnsync(liveTokenGapEnd + index)); + token = liveTokenList.tokenOrEmbeddingUnsync(liveTokenGapEnd + index).token(); if (token.isFlyweight()) { offset += token.length(); } else { // non-flyweight element - offset += tokenOffset(token, liveTokenList, token.rawOffset()); + offset += tokenOffset(token, liveTokenList); break; } } if (index == -1) { // below the boundary of above-gap live tokens index += liveTokenGapStart + origTokenCount; if (index >= 0) { - offset += tokenOffset(index); + offset += tokenOffsetByIndex(index); } } } else { // non-flyweight - offset = tokenOffset(token, liveTokenList, token.rawOffset()); + offset = tokenOffset(token, liveTokenList); } return offset; + } + + public int[] tokenIndex(int offset) { + return LexerUtilsConstants.tokenIndexLazyTokenCreation(this, offset); } /** @@ -186,7 +188,7 @@ * @return offset for the particular token. */ public int tokenOffset( - AbstractToken token, TokenList tokenList, int rawOffset) { + AbstractToken token, TokenList tokenList) { // The following situations can happen: // 1. Token instance is contained in token2offset map so the token's // offset is overriden by the information in the map. @@ -205,13 +207,13 @@ // needs to be corrected if necessary. if (tokenList.getClass() == EmbeddedTokenList.class) { EmbeddedTokenList etl = (EmbeddedTokenList)tokenList; - AbstractToken rootBranchToken = etl.rootToken(); + AbstractToken rootBranchToken = null; // originally etl.rootToken(); Token2OffsetEntry entry = token2offset.get(rootBranchToken); if (entry != null) { - return entry.offset() + etl.childTokenOffsetShift(rawOffset); + return entry.offset();// used to be: + etl.childTokenOffsetShift(rawOffset); } else { // no special entry => check whether the regular offset is below liveTokenGapStartOffset - int offset = etl.childTokenOffset(rawOffset); - TokenList rootTokenList = etl.root(); + int offset = etl.tokenOffset(token); + TokenList rootTokenList = etl.rootTokenList(); if (rootTokenList != null && rootTokenList.getClass() == IncTokenList.class) { if (offset >= liveTokenGapStartOffset) { offset += liveTokenOffsetDiff; @@ -226,14 +228,13 @@ if (entry != null) { return entry.offset(); } else { + int offset = tokenList.tokenOffset(token); if (tokenList.getClass() == IncTokenList.class) { - rawOffset = tokenList.childTokenOffset(rawOffset); - if (rawOffset >= liveTokenGapStartOffset) { - rawOffset += liveTokenOffsetDiff; + if (offset >= liveTokenGapStartOffset) { + offset += liveTokenOffsetDiff; } - return rawOffset; } - return tokenList.childTokenOffset(rawOffset); + return offset; } } } @@ -253,27 +254,28 @@ } public int modCount() { - return -1; + return LexerUtilsConstants.MOD_COUNT_IMMUTABLE_INPUT; } - public int childTokenOffset(int rawOffset) { + public int tokenOffset(AbstractToken token) { + int rawOffset = token.rawOffset(); // Offset of the standalone token is absolute return rawOffset; } - public char childTokenCharAt(int rawOffset, int index) { + public char charAt(int offset) { // No tokens expected to be parented to this token list throw new IllegalStateException("Not expected to be called"); // NOI18N } - public void wrapToken(int index, EmbeddingContainer embeddingContainer) { + public void wrapToken(int index, EmbeddingContainer embeddingContainer) { // Allow branching if (liveTokenGapStart == -1 || index < liveTokenGapStart) { liveTokenList.wrapToken(index, embeddingContainer); } else { index -= liveTokenGapStart; if (index < origTokenCount) { - origTokensOrBranches[origTokenStartIndex + index] = embeddingContainer; + origTokenOrEmbeddings[origTokenStartIndex + index] = embeddingContainer; } else { liveTokenList.wrapToken(liveTokenGapEnd + index - origTokenCount, embeddingContainer); } @@ -288,7 +290,7 @@ index -= liveTokenGapStart; if (index < origTokenCount) { nonFlyToken = ((TextToken)flyToken).createCopy(this, offset); - origTokensOrBranches[origTokenStartIndex + index] = nonFlyToken; + origTokenOrEmbeddings[origTokenStartIndex + index] = nonFlyToken; } else { nonFlyToken = liveTokenList.replaceFlyToken( liveTokenGapEnd + index - origTokenCount, @@ -298,10 +300,14 @@ return nonFlyToken; } - public TokenList root() { + public TokenList rootTokenList() { return this; } - + + public CharSequence inputSourceText() { + return rootTokenList().inputSourceText(); + } + public TokenHierarchyOperation tokenHierarchyOperation() { return snapshot; } @@ -320,14 +326,14 @@ public int startOffset() { if (tokenCountCurrent() > 0 || tokenCount() > 0) - return tokenOffset(0); + return tokenOffsetByIndex(0); return 0; } public int endOffset() { int cntM1 = tokenCount() - 1; if (cntM1 >= 0) - return tokenOffset(cntM1) + LexerUtilsConstants.token(this, cntM1).length(); + return tokenOffsetByIndex(cntM1) + tokenOrEmbedding(cntM1).token().length(); return 0; } @@ -350,8 +356,10 @@ liveTokenGapStart = startRemovedIndex; liveTokenGapEnd = startRemovedIndex; liveTokenGapStartOffset = change.offset(); - origTokensOrBranches = new Object[removedTokenList.tokenCount()]; - origOffsets = new int[origTokensOrBranches.length]; + @SuppressWarnings("unchecked") + TokenOrEmbedding[] tokenOrEmbeddings = new TokenOrEmbedding[removedTokenList.tokenCount()]; + origTokenOrEmbeddings = tokenOrEmbeddings; + origOffsets = new int[origTokenOrEmbeddings.length]; } int liveTokenIndexDiff = change.tokenChangeInfo().addedTokenCount() @@ -367,29 +375,29 @@ int offset = change.offset(); liveTokenGapStartOffset = offset; for (index = startRemovedIndex; index < bound; index++) { - Object tokenOrEmbeddingContainer = removedTokenList.tokenOrEmbeddingContainer(index - startRemovedIndex); - AbstractToken token = LexerUtilsConstants.token(tokenOrEmbeddingContainer); + TokenOrEmbedding tokenOrEmbedding = removedTokenList.tokenOrEmbedding(index - startRemovedIndex); + AbstractToken token = tokenOrEmbedding.token(); if (!token.isFlyweight()) { TokenList tokenList = token.tokenList(); if (tokenList == null) { - tokenList = new StandaloneTokenList(change.languagePath(), - eventInfo.originalText().toCharArray(offset, offset + token.length())); + tokenList = null; // new StandaloneTokenList(change.languagePath(), + // eventInfo.originalText().toCharArray(offset, offset + token.length())); token.setTokenList(tokenList); } } origOffsets[origTokenStartIndex] = offset; - origTokensOrBranches[origTokenStartIndex++] = tokenOrEmbeddingContainer; + origTokenOrEmbeddings[origTokenStartIndex++] = tokenOrEmbedding; offset += token.length(); } while (index < liveTokenGapStart) { - Object tokenOrEmbeddingContainer = liveTokenList.tokenOrEmbeddingContainerUnsync(index + liveTokenIndexDiff); - AbstractToken t = LexerUtilsConstants.token(tokenOrEmbeddingContainer); + TokenOrEmbedding tokenOrEmbedding = liveTokenList.tokenOrEmbeddingUnsync(index + liveTokenIndexDiff); + AbstractToken t = tokenOrEmbedding.token(); if (!t.isFlyweight()) { token2offset.putEntry(new Token2OffsetEntry(t, offset)); } origOffsets[origTokenStartIndex] = offset; - origTokensOrBranches[origTokenStartIndex++] = tokenOrEmbeddingContainer; + origTokenOrEmbeddings[origTokenStartIndex++] = tokenOrEmbedding; offset += t.length(); index++; } @@ -406,14 +414,14 @@ int index = endRemovedIndex; int offset = change.removedEndOffset(); for (index = endRemovedIndex - 1; index >= bound; index--) { - Object tokenOrEmbeddingContainer = removedTokenList.tokenOrEmbeddingContainer(index - startRemovedIndex); - AbstractToken token = LexerUtilsConstants.token(tokenOrEmbeddingContainer); + TokenOrEmbedding tokenOrEmbedding = removedTokenList.tokenOrEmbedding(index - startRemovedIndex); + AbstractToken token = tokenOrEmbedding.token(); offset -= token.length(); if (!token.isFlyweight()) { TokenList tokenList = token.tokenList(); if (tokenList == null) { - tokenList = new StandaloneTokenList(change.languagePath(), - eventInfo.originalText().toCharArray(offset, offset + token.length())); + tokenList = null; // new StandaloneTokenList(change.languagePath(), + // eventInfo.originalText().toCharArray(offset, offset + token.length())); token.setTokenList(tokenList); } } @@ -422,19 +430,19 @@ if (liveTokenOffsetDiff != 0) { token2offset.putEntry(new Token2OffsetEntry(token, origOffsets[origTokenIndex])); } - origTokensOrBranches[origTokenIndex--] = tokenOrEmbeddingContainer; + origTokenOrEmbeddings[origTokenIndex--] = tokenOrEmbedding; } while (index >= liveTokenGapEnd) { - Object tokenOrEmbeddingContainer = liveTokenList.tokenOrEmbeddingContainerUnsync(index + liveTokenIndexDiff); - AbstractToken token = LexerUtilsConstants.token(tokenOrEmbeddingContainer); + TokenOrEmbedding tokenOrEmbedding = liveTokenList.tokenOrEmbeddingUnsync(index + liveTokenIndexDiff); + AbstractToken token = tokenOrEmbedding.token(); offset -= token.length(); if (!token.isFlyweight()) { token2offset.putEntry(new Token2OffsetEntry(token, offset)); } origOffsets[origTokenIndex] = offset + liveTokenOffsetDiff; token2offset.putEntry(new Token2OffsetEntry(token, origOffsets[origTokenIndex])); - origTokensOrBranches[origTokenIndex--] = tokenOrEmbeddingContainer; + origTokenOrEmbeddings[origTokenIndex--] = tokenOrEmbedding; index--; } liveTokenGapEnd = endRemovedIndex; @@ -445,52 +453,56 @@ } private void ensureOrigTokensStartCapacity(int extraOrigTokenCount) { - if (extraOrigTokenCount > origTokensOrBranches.length - origTokenCount) { // will need to reallocate + if (extraOrigTokenCount > origTokenOrEmbeddings.length - origTokenCount) { // will need to reallocate // Could check for maximum possible token count (origTokenCount + below-and-above live token counts) // but would cause init of live tokens above gap which is undesirable - Object[] newOrigTokensOrBranches = new Object[(origTokensOrBranches.length * 3 / 2) + extraOrigTokenCount]; + @SuppressWarnings("unchecked") + TokenOrEmbedding[] newOrigTokensOrBranches = new TokenOrEmbedding[ + (origTokenOrEmbeddings.length * 3 / 2) + extraOrigTokenCount]; int[] newOrigOffsets = new int[newOrigTokensOrBranches.length]; int newIndex = Math.max(extraOrigTokenCount, (newOrigTokensOrBranches.length - (origTokenCount + extraOrigTokenCount)) / 2); - System.arraycopy(origTokensOrBranches, origTokenStartIndex, + System.arraycopy(origTokenOrEmbeddings, origTokenStartIndex, newOrigTokensOrBranches, newIndex, origTokenCount); System.arraycopy(origOffsets, origTokenStartIndex, newOrigOffsets, newIndex, origTokenCount); - origTokensOrBranches = newOrigTokensOrBranches; + origTokenOrEmbeddings = newOrigTokensOrBranches; origOffsets = newOrigOffsets; origTokenStartIndex = newIndex; } else if (extraOrigTokenCount > origTokenStartIndex) { // only move // Move to the end of the array - int newIndex = origTokensOrBranches.length - origTokenCount; - System.arraycopy(origTokensOrBranches, origTokenStartIndex, - origTokensOrBranches, newIndex, origTokenCount); + int newIndex = origTokenOrEmbeddings.length - origTokenCount; + System.arraycopy(origTokenOrEmbeddings, origTokenStartIndex, + origTokenOrEmbeddings, newIndex, origTokenCount); System.arraycopy(origOffsets, origTokenStartIndex, origOffsets, newIndex, origTokenCount); - origTokenStartIndex = origTokensOrBranches.length - origTokenCount; + origTokenStartIndex = origTokenOrEmbeddings.length - origTokenCount; } } private void ensureOrigTokensEndCapacity(int extraOrigTokenCount) { - if (extraOrigTokenCount > origTokensOrBranches.length - origTokenCount) { // will need to reallocate + if (extraOrigTokenCount > origTokenOrEmbeddings.length - origTokenCount) { // will need to reallocate // Could check for maximum possible token count (origTokenCount + below-and-above live token counts) // but would cause init of live tokens above gap which is undesirable - Object[] newOrigTokensOrBranches = new Object[(origTokensOrBranches.length * 3 / 2) + extraOrigTokenCount]; + @SuppressWarnings("unchecked") + TokenOrEmbedding[] newOrigTokensOrBranches = new TokenOrEmbedding[ + (origTokenOrEmbeddings.length * 3 / 2) + extraOrigTokenCount]; int[] newOrigOffsets = new int[newOrigTokensOrBranches.length]; int newIndex = (newOrigTokensOrBranches.length - (origTokenCount + extraOrigTokenCount)) / 2; - System.arraycopy(origTokensOrBranches, origTokenStartIndex, + System.arraycopy(origTokenOrEmbeddings, origTokenStartIndex, newOrigTokensOrBranches, newIndex, origTokenCount); System.arraycopy(origOffsets, origTokenStartIndex, newOrigOffsets, newIndex, origTokenCount); - origTokensOrBranches = newOrigTokensOrBranches; + origTokenOrEmbeddings = newOrigTokensOrBranches; origOffsets = newOrigOffsets; origTokenStartIndex = newIndex; - } else if (extraOrigTokenCount > origTokensOrBranches.length - origTokenCount - origTokenStartIndex) { // only move + } else if (extraOrigTokenCount > origTokenOrEmbeddings.length - origTokenCount - origTokenStartIndex) { // only move // Move to the end of the array - System.arraycopy(origTokensOrBranches, origTokenStartIndex, - origTokensOrBranches, 0, origTokenCount); + System.arraycopy(origTokenOrEmbeddings, origTokenStartIndex, + origTokenOrEmbeddings, 0, origTokenCount); System.arraycopy(origOffsets, origTokenStartIndex, origOffsets, 0, origTokenCount); origTokenStartIndex = 0; diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/StandaloneTokenList.java --- a/lexer/src/org/netbeans/lib/lexer/inc/StandaloneTokenList.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,162 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.inc; - -import java.util.Set; -import org.netbeans.api.lexer.InputAttributes; -import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.EmbeddingContainer; -import org.netbeans.lib.lexer.LexerUtilsConstants; -import org.netbeans.lib.lexer.TokenHierarchyOperation; -import org.netbeans.lib.lexer.TokenList; -import org.netbeans.lib.lexer.token.AbstractToken; - - -/** - * Single token list maintains a text for a single token. - *
- * It's used for token hierarchy snapshots only. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public final class StandaloneTokenList implements TokenList { - - private char[] tokenText; - - private LanguagePath languagePath; - - public StandaloneTokenList(LanguagePath languagePath, char[] tokenText) { - this.languagePath = languagePath; - this.tokenText = tokenText; - } - - public LanguagePath languagePath() { - return languagePath; - } - - public Object tokenOrEmbeddingContainer(int index) { - throw new IllegalStateException("Not expected to be called"); // NOI18N - } - - public AbstractToken replaceFlyToken( - int index, AbstractToken flyToken, int offset) { - throw new IllegalStateException("Not expected to be called"); // NOI18N - } - - public int lookahead(int index) { - return -1; - } - - public Object state(int index) { - return null; - } - - public int tokenOffset(int index) { - throw new IllegalStateException("Not expected to be called"); // NOI18N - } - - public int tokenCount() { - return 1; - } - - public int tokenCountCurrent() { - return 1; - } - - public int modCount() { - return -1; - } - - public int childTokenOffset(int rawOffset) { - // Offset of the standalone token is absolute - return rawOffset; - } - - public char childTokenCharAt(int rawOffset, int index) { - return tokenText[index]; - } - - public void wrapToken(int index, EmbeddingContainer embeddingContainer) { - throw new IllegalStateException("Branching of standalone tokens not supported"); // NOI18N - } - - public TokenList root() { - return this; - } - - public TokenHierarchyOperation tokenHierarchyOperation() { - return null; - } - - public InputAttributes inputAttributes() { - throw new IllegalStateException("Not expected to be called"); // NOI18N - } - - public boolean isContinuous() { - return true; - } - - public Set skipTokenIds() { - return null; - } - - public int startOffset() { - if (tokenCountCurrent() > 0 || tokenCount() > 0) - return tokenOffset(0); - return 0; - } - - public int endOffset() { - int cntM1 = tokenCount() - 1; - if (cntM1 >= 0) - return tokenOffset(cntM1) + LexerUtilsConstants.token(this, cntM1).length(); - return 0; - } - - public boolean isRemoved() { - return false; // Should be used when part of a snapshot - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/TokenHierarchyEventInfo.java --- a/lexer/src/org/netbeans/lib/lexer/inc/TokenHierarchyEventInfo.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/TokenHierarchyEventInfo.java Wed May 28 14:48:55 2008 +0200 @@ -43,12 +43,9 @@ import org.netbeans.api.lexer.TokenChange; import org.netbeans.api.lexer.TokenHierarchyEventType; -import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.CharSequenceUtilities; import org.netbeans.lib.lexer.LexerApiPackageAccessor; -import org.netbeans.lib.lexer.LexerSpiPackageAccessor; import org.netbeans.lib.lexer.TokenHierarchyOperation; -import org.netbeans.spi.lexer.MutableTextInput; /** * Shared information for all the token list changes @@ -66,7 +63,7 @@ private TokenChange tokenChange; - private final int modificationOffset; + private final int modOffset; private final int removedLength; @@ -97,7 +94,7 @@ this.tokenHierarchyOperation = tokenHierarchyOperation; this.type = type; - this.modificationOffset = modificationOffset; + this.modOffset = modificationOffset; this.removedLength = removedLength; this.removedText = removedText; this.insertedLength = insertedLength; @@ -142,8 +139,8 @@ } } - public int modificationOffset() { - return modificationOffset; + public int modOffset() { + return modOffset; } public int removedLength() { @@ -159,7 +156,11 @@ } public CharSequence insertedText() { - return currentText().subSequence(modificationOffset(), modificationOffset() + insertedLength()); + return currentText().subSequence(modOffset(), modOffset() + insertedLength()); + } + + public int diffLength() { + return insertedLength - removedLength; } /** @@ -181,7 +182,7 @@ ); } originalText = new OriginalText(currentText(), - modificationOffset, removedText, insertedLength); + modOffset, removedText, insertedLength); } return originalText; } @@ -193,8 +194,8 @@ public String modificationDescription(boolean detail) { StringBuilder sb = new StringBuilder(originalText().length() + 300); if (removedLength() > 0) { - sb.append("TEXT REMOVED <").append(modificationOffset()).append(","). // NOI18N - append(modificationOffset() + removedLength()).append('>'); + sb.append("TEXT REMOVED <").append(modOffset()).append(","). // NOI18N + append(modOffset() + removedLength()).append('>'); sb.append(':').append(removedLength()); if (removedText() != null) { sb.append(" \""); @@ -204,8 +205,8 @@ sb.append('\n'); } if (insertedLength() > 0) { - sb.append("TEXT INSERTED <").append(modificationOffset()).append(","). // NOI18N - append(modificationOffset() + insertedLength()).append(">:"). // NOI18N + sb.append("TEXT INSERTED <").append(modOffset()).append(","). // NOI18N + append(modOffset() + insertedLength()).append(">:"). // NOI18N append(insertedLength()).append(" \""); // NOI18N CharSequenceUtilities.debugText(sb, insertedText()); sb.append("\"\n"); @@ -214,7 +215,7 @@ sb.append("\n\n----------------- ORIGINAL TEXT -----------------\n" + // NOI18N originalText() + "\n----------------- BEFORE-CARET TEXT -----------------\n" + // NOI18N - originalText().subSequence(0, modificationOffset()) + + originalText().subSequence(0, modOffset()) + "|<--CARET\n" // NOI18N ); } @@ -234,7 +235,7 @@ public String toString() { StringBuilder sb = new StringBuilder(); sb.append("modOffset="); // NOI18N - sb.append(modificationOffset()); + sb.append(modOffset()); if (removedLength() > 0) { sb.append(", removedLength="); sb.append(removedLength()); diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/TokenHierarchyUpdate.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/inc/TokenHierarchyUpdate.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,525 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.inc; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.EmbeddedTokenList; +import org.netbeans.lib.lexer.EmbeddingContainer; +import org.netbeans.lib.lexer.TokenHierarchyOperation; +import org.netbeans.lib.lexer.TokenList; +import org.netbeans.lib.lexer.TokenListList; + +/** + * Request for updating of token hierarchy after text modification + * or custom embedding creation/removal. + *
+ * This class contains all the data and methods related to updating. + * + * @author Miloslav Metelka + */ + +public final class TokenHierarchyUpdate { + + // -J-Dorg.netbeans.lib.lexer.TokenHierarchyUpdate.level=FINE + static final Logger LOG = Logger.getLogger(TokenHierarchyUpdate.class.getName()); + + /** + * Special constant value to avoid double map search for token list lists updating. + */ + private static final UpdateItem NO_ITEM = new UpdateItem(null); + + final TokenHierarchyEventInfo eventInfo; + + /** + * Infos ordered from higher top levels of the hierarchy to lower levels. + * Useful for top-down updating at the end. + */ + private List>> itemLevels; + + /** + * Mapping of LP to UpdateItem for a joined ETLs. + */ + private Map> path2Item; + + private LanguagePath lastPath2ItemPath; + private UpdateItem lastPath2ItemItem; + + public TokenHierarchyUpdate(TokenHierarchyEventInfo eventInfo) { + this.eventInfo = eventInfo; + } + + public void update() { + TokenHierarchyOperation operation = eventInfo.tokenHierarchyOperation(); + IncTokenList incTokenList = (IncTokenList) operation.rootTokenList(); + + if (LOG.isLoggable(Level.FINE)) { + if (LOG.isLoggable(Level.FINEST)) { + // Display current state of the hierarchy by faking its text + // through original text + CharSequence text = incTokenList.inputSourceText(); + assert (text != null); + incTokenList.setInputSourceText(eventInfo.originalText()); + // Dump all contents + LOG.finest(toString()); + // Return the original text + incTokenList.setInputSourceText(text); + } + + StringBuilder sb = new StringBuilder(150); + sb.append("<<<<<<<<<<<<<<<<<< LEXER CHANGE START ------------------\n"); // NOI18N + sb.append(eventInfo.modificationDescription(false)); + TokenHierarchyUpdate.LOG.fine(sb.toString()); + } + + updateImpl(incTokenList, (operation.maxTokenListListPathSize() > 0)); + + if (LOG.isLoggable(Level.FINE)) { + LOG.fine("AFFECTED: " + eventInfo.dumpAffected() + "\n"); // NOI18N + String extraMsg = ""; + if (LOG.isLoggable(Level.FINER)) { + // Check consistency of the whole token hierarchy + String error = operation.checkConsistency(); + if (error != null) { + String msg = "!!!CONSISTENCY-ERROR!!!: " + error + "\n"; + if (LOG.isLoggable(Level.FINEST)) { + throw new IllegalStateException(msg); + } else { + LOG.finer(msg); + } + } else { + extraMsg = "(TokenHierarchy Check OK) "; + } + } + LOG.fine(">>>>>>>>>>>>>>>>>> LEXER CHANGE END " + extraMsg + "------------------\n"); // NOI18N + } + + if (LOG.isLoggable(Level.FINEST)) { + LOG.finest("AFTER UPDATE:\n"); + LOG.finest(toString()); + } + } + + private void updateImpl(IncTokenList incTokenList, boolean tllChildrenMayExist) { + incTokenList.incrementModCount(); + + // Update starts at the top language path an goes to possibly embedded-token-lists (ETLs) + // based on the top-level change. If there are embeddings that join sections + // a token-list-list (TLL) exists for the given language path that maintains + // all ETLs for the whole input source. + // 1. The updating must always go from upper levels to more embedded levels of the token hierarchy + // to ensure that the tokens of the possible joined ETLs get updated properly + // as the tokens created/removed at upper levels may contain embeddings that will + // need to be added/removed from TLL of more embedded level. + // 2. A single insert/remove may produce token updates at several + // places in the document due to joining of ETLs. In turn the added/removed + // ETLs may affect more embedded levels so the update can affect + // multiple places of input source. + // 3. The algorithm must collect both removed and added ETLs + // and process them prior calling the TokenListUpdater to update actual tokens. + // 4. For a removed ETL the updating must check and collect nested ETLs + // because some embedded tokens of the removed ETL might contain + // another ETL that might be maintained as TLL. + // 5. Added ETLs must also be inspected for nested ETLs maintained in a TLL. + // Initialization of added ETLs is done when the particular level is processed + // because TLL can join sections so they must be lexed once the definite additions + // and removals of ETLs are known. For non-joining ETLs this could be done + // immediately but it is not necessary so it's done at the same time as well. + // 6. For all TLLs their parent TLLs (for language path with last language stripped) + // are also maintained mandatorily. + // 7. Algorithm maintains "item-levels" to respect top-down processing + // according to language-path-depth. + + itemLevels = new ArrayList>>(3); // Suffice for two-level embedding without realloc + // Create root item first for root token list + UpdateItem rootItem = new UpdateItem(this); + rootItem.tokenListChange = new TokenListChange(incTokenList); + rootItem.tllChildrenMayExist = tllChildrenMayExist; + addItem(rootItem, 0); + processLevelInfos(); + } + + public void updateCreateOrRemoveEmbedding(EmbeddedTokenList addedOrRemovedTokenList, boolean add) { + LanguagePath languagePath = addedOrRemovedTokenList.languagePath(); + int level = languagePath.size() - 1; + itemLevels = new ArrayList>>(level + 2); // One extra level for growth + UpdateItem item = tokenListListItem(languagePath); + if (item != null) { + if (LOG.isLoggable(Level.FINE)) { + LOG.fine("THU.updateCreateOrRemoveEmbedding() add=" + add + ": " + addedOrRemovedTokenList.dumpInfo(null)); + } + if (add) { + item.tokenListListUpdate.markAddedMember(addedOrRemovedTokenList); + } else { + item.tokenListListUpdate.markRemovedMember(addedOrRemovedTokenList, eventInfo); + } + processLevelInfos(); + } + } + + private void processLevelInfos() { + // Process item levels which can extend the list by new items at the same level + // or in the next levels. Therefore iterate by INDEX.since size() may change. + for (int i = 0; i < itemLevels.size(); i++) { + List> items = itemLevels.get(i); + // The "items" list should not be extended by additional items dynamically during iteration. + for (UpdateItem item : items) { + item.update(); + } + } + } + + void addItem(UpdateItem item, int level) { + while (level >= itemLevels.size()) { + itemLevels.add(new ArrayList>(3)); + } + List> items = itemLevels.get(level); + items.add(item); + } + + void collectAddedRemovedEmbeddings(TokenListChange change) { + // Only called when tll children exist + // First collect the removed embeddings + TokenList removedTokenList = change.tokenChangeInfo().removedTokenList(); + if (removedTokenList != null) { + collectRemovedEmbeddings(removedTokenList); + } + // Now collect added embeddings + TokenList currentTokenList = change.tokenList(); + collectAddedEmbeddings(currentTokenList, change.index(), change.addedTokenOrEmbeddingsCount()); + } + + /** + * Collect removed embeddings for the given token list recursively + * and nest deep enough for all maintained children + * token list lists. + */ + void collectRemovedEmbeddings(TokenList removedTokenList) { + int tokenCount = removedTokenList.tokenCountCurrent(); + for (int i = 0; i < tokenCount; i++) { // Must go from first to last + EmbeddingContainer ec = removedTokenList.tokenOrEmbedding(i).embedding(); + if (ec != null) { + ec.updateStatusUnsync(); // Update status since markRemoved() will need it + EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); + while (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING) { + internalMarkAddedRemovedMember(etl, false); + etl = etl.nextEmbeddedTokenList(); + } + } + } + } + + void collectAddedEmbeddings(TokenList tokenList, int index, int addedCount) { + for (int i = 0; i < addedCount; i++) { + // Ensure that the default embedding gets possibly created + EmbeddedTokenList etl = EmbeddingContainer.embeddedTokenList(tokenList, index + i, null, false); + while (etl != null) { + internalMarkAddedRemovedMember(etl, true); + etl = etl.nextEmbeddedTokenList(); + } + } + } + + /** + * This code is extracted from collectAdded/RemovedEmbeddings() for convenient generification + * over a type ET. + */ + private void internalMarkAddedRemovedMember(EmbeddedTokenList etl, boolean add) { + UpdateItem item = tokenListListItem(etl.languagePath()); + if (item != null) { + // update-status called in caller + if (add) { + item.tokenListListUpdate.markAddedMember(etl); + } else { + item.tokenListListUpdate.markRemovedMember(etl, eventInfo); + } + } + } + + /** + * Return tll info or null if the token list list is not maintained + * for the given language path. + */ + private UpdateItem tokenListListItem(LanguagePath languagePath) { + if (languagePath == lastPath2ItemPath) { // Use last queried one + @SuppressWarnings("unchecked") + UpdateItem item = (UpdateItem) lastPath2ItemItem; + return item; + + } else { // Not last returned item + if (path2Item == null) { // Init since it will contain either target item or noInfo() + path2Item = new HashMap>(4, 0.5f); + } + @SuppressWarnings("unchecked") + UpdateItem item = (UpdateItem)path2Item.get(languagePath); + if (item == NO_ITEM) { // Marker value for null (to query just single map - this one) + item = null; + } else if (item == null) { + TokenListList tokenListList = eventInfo.tokenHierarchyOperation().existingTokenListList(languagePath); + if (tokenListList != null) { + item = new UpdateItem(this); + item.setTokenListList(tokenListList); + item.tllChildrenMayExist = tokenListList.hasChildren(); + int level = languagePath.size() - 1; + addItem(item, level); // Add item to be scheduled for processing + path2Item.put(languagePath, item); + } else { // Use NO_ITEM marker value to immediately know that there's no tokenListList for the given LP + path2Item.put(languagePath, NO_ITEM); // NO_ITEM is of type UpdateItem + } + } // else - regular valid item + lastPath2ItemItem = item; // Remember unmasked value i.e. "null" directly + return item; + } + } + + /** + * Information about update in a particular token list or a particular token list list. + */ + static final class UpdateItem { + + private static final EmbeddedTokenList[] EMPTY_ETL_ARRAY = new EmbeddedTokenList[0]; + + final TokenHierarchyUpdate update; + + UpdateItem parentItem; + + /** + * Token list change performed during this update. + */ + TokenListChange tokenListChange; + + TokenListListUpdate tokenListListUpdate; + + boolean tllChildrenMayExist; + + public UpdateItem(TokenHierarchyUpdate update) { + this.update = update; + } + + void setParentItem(UpdateItem parentItem) { + assert (this.parentItem == null); + this.parentItem = parentItem; + } + + void setTokenListList(TokenListList tokenListList) { + this.tokenListListUpdate = new TokenListListUpdate(tokenListList); + } + + void initTokenListChange(EmbeddedTokenList etl) { + assert (tokenListChange == null); + if (tokenListListUpdate != null) { + // ETL managed by a TokenListList. If the TLL joins sections + // then a JoinTokenListChange needs to be created. + tokenListChange = tokenListListUpdate.createTokenListChange(etl); + + } else { // No child managed by TLL but want to process nested possible bounds changes as deep as possible + // Perform change in child - it surely does not join the sections + // since otherwise the tllItem could not be null. + // Token list change is surely non-join since there is no TLLInfo + tokenListChange = new TokenListChange(etl); + } + } + + /** + * Update token list(s) after added and removed embedded token lists + * are known and in place. + */ + void update() { + TokenHierarchyEventInfo eventInfo = update.eventInfo; + if (tokenListChange == null) { // Joined or unjoined ETLs + assert (tokenListListUpdate != null); + if (tokenListListUpdate.tokenListList.joinSections()) { + tokenListChange = tokenListListUpdate.createJoinTokenListChange(); + } + } // else tokenListChange != null + + // Use always non-null List for added token lists + if (tokenListListUpdate != null && tokenListListUpdate.addedTokenLists == null) { + tokenListListUpdate.addedTokenLists = Collections.emptyList(); + } + + // Process the token list change by calling token list updater + if (tokenListChange != null) { // Updating a concrete token list as a bounds change or joined change + if (tokenListChange.getClass() == JoinTokenListChange.class) { + JoinTokenListChange jChange = (JoinTokenListChange) tokenListChange; + assert (tokenListListUpdate != null); + assert (tokenListListUpdate.modTokenListIndex != -1); + jChange.setTokenListListUpdate(tokenListListUpdate); + TokenListUpdater.updateJoined(jChange, eventInfo); + + } else { // non-joined update + TokenListUpdater.updateRegular(tokenListChange, eventInfo); + if (parentItem == null) { + eventInfo.setTokenChangeInfo(tokenListChange.tokenChangeInfo()); + } + } + + // Possibly process bounds change + if (tokenListChange.isBoundsChange()) { + TokenListChange change; + if (tokenListChange.getClass() == JoinTokenListChange.class) { + // Process the one embedded change + JoinTokenListChange jChange = (JoinTokenListChange) tokenListChange; + assert (jChange.relexChanges().size() == 1); + change = jChange.relexChanges().get(0); + } else { + change = tokenListChange; + } + processBoundsChange(change); + + } else { // Non-bounds change + // Mark changed area based on start of first mod.token and end of last mod.token + // of the root-level change + eventInfo.setMinAffectedStartOffset(tokenListChange.offset()); + eventInfo.setMaxAffectedEndOffset(tokenListChange.addedEndOffset()); + if (tllChildrenMayExist) { // If there are any possible embedded changes with TokenListList + if (tokenListChange.getClass() == JoinTokenListChange.class) { + JoinTokenListChange jChange = (JoinTokenListChange) tokenListChange; + List> relexChanges = jChange.relexChanges(); + for (TokenListChange change : relexChanges) { + update.collectAddedRemovedEmbeddings(change); // Process individual changes + } + } else { + update.collectAddedRemovedEmbeddings(tokenListChange); + } + } // else: there is no embedding with TLL; existing ETLs will be abandoned; new one created on demand + } + + } else if (tokenListListUpdate != null) { // Only service added/removed ETLs + tokenListListUpdate.addRemoveTokenLists(update, tllChildrenMayExist); + } + } + + /** + * Process a change where just a single token was relexed and it's the same + * just with updated bounds. + * + * @param change non-null change describing the change. + * @param parentChange parent change or null for root change. + */ + void processBoundsChange(TokenListChange change) { + // Add an embedded change to the parent change (if exists) + if (parentItem != null) { + parentItem.tokenListChange.tokenChangeInfo().addEmbeddedChange(change.tokenChangeInfo()); + } + // Go through all embedded list in a chain and check whether the embeddings are OK + EmbeddingContainer ec = change.tokenChangeInfo().removedTokenList().tokenOrEmbedding(0).embedding(); + if (ec != null) { // The only removed token had embeddings + // Rewrap token in ec - use the added token + ec.reinit(change.addedToken(0)); + ec.updateStatusUnsync(); + change.tokenList().wrapToken(change.index(), ec); + // Go through all ETLs and check whether chars in start/end skip lengths weren't modified + EmbeddedTokenList etl = ec.firstEmbeddedTokenList(); + if (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING) { + // Check the text length beyond modification => end skip length must not be affected + TokenHierarchyEventInfo eventInfo = update.eventInfo; + int modRelOffset = eventInfo.modOffset() - change.offset(); + int beyondModLength = change.addedEndOffset() - (eventInfo.modOffset() + eventInfo.diffLengthOrZero()); + EmbeddedTokenList prevEtl = null; + do { + // Check whether chars in start/end skip lengths weren't modified + if (processBoundsChangeEmbeddedTokenList(etl, modRelOffset, beyondModLength)) { // Embedding saved -> proceed to next ETL + prevEtl = etl; + etl = prevEtl.nextEmbeddedTokenList(); + } else { + etl = ec.removeEmbeddedTokenList(prevEtl, etl); + } + } while (etl != null && etl != EmbeddedTokenList.NO_DEFAULT_EMBEDDING); + } + } + } + + /** + * This method is extracted from processBoundsChangeEmbeddings() to allow a separate generification + * for each ETL contained in an EC. + * + * @param etl + * @param ecTokenChange change for token wrapped by EC in which the ETL is hosted. + * @param hasChildren + * @param modRelOffset + * @param beyondModLength + * @return true if the embedding should be saved or false if it should be removed. + */ + private boolean processBoundsChangeEmbeddedTokenList( + EmbeddedTokenList etl, int modRelOffset, int beyondModLength + ) { + UpdateItem childItem = tllChildrenMayExist + ? update.tokenListListItem(etl.languagePath()) + : null; + // Check whether the change was not in the start or end skip lengths + // and if so then remove the embedding + if (modRelOffset >= etl.embedding().startSkipLength() && beyondModLength >= etl.embedding().endSkipLength()) { + // Modification within embedding's bounds => embedding can stay + // Embedding will be updated once the level gets processed + if (childItem == null) { + childItem = new UpdateItem(update); + int level = etl.languagePath().size() - 1; + update.addItem(childItem, level); + } else { // TokenListList exists - item already added + // Mark a bounds change + childItem.tokenListListUpdate.markChangedMember(etl); + } + childItem.setParentItem(this); + childItem.initTokenListChange(etl); + return true; // Embedding saved -> proceed with next + + } else { // Mod in start/stop skip length => Remove the etl from chain + if (childItem != null) { + // update-status already done as part of rewrap-token + childItem.tokenListListUpdate.markRemovedMember(etl, update.eventInfo); + } + // Signal to remove embedding + return false; + } + } + + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/TokenListChange.java --- a/lexer/src/org/netbeans/lib/lexer/inc/TokenListChange.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/TokenListChange.java Wed May 28 14:48:55 2008 +0200 @@ -50,6 +50,7 @@ import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.lib.lexer.TokenList; import org.netbeans.lib.lexer.token.AbstractToken; +import org.netbeans.lib.lexer.TokenOrEmbedding; /** * Description of the change in a token list. @@ -65,22 +66,31 @@ * @version 1.00 */ -public final class TokenListChange { +public class TokenListChange { + + public static TokenListChange createRebuildChange(MutableTokenList tokenList) { + TokenListChange change = new TokenListChange(tokenList); +// change.setIndex(0); +// change.setOffset(0); +// change.addedEndOffset = 0; // Tokens will be recreated lazily + change.matchIndex = tokenList.tokenCountCurrent(); // All tokens removed + return change; + } private final TokenChangeInfo tokenChangeInfo; /** * The list may store either tokens or branches as well. */ - private List addedTokensOrBranches; + private List> addedTokenOrEmbeddings; private LAState laState; - - private int offsetGapIndex; - private int removedEndOffset; + int removedEndOffset; - private int addedEndOffset; + protected int matchIndex; + + protected int matchOffset; // Works like addedEndOffset public TokenListChange(MutableTokenList tokenList) { tokenChangeInfo = new TokenChangeInfo(tokenList); @@ -94,16 +104,29 @@ return (MutableTokenList)tokenChangeInfo.currentTokenList(); } + public void setMatchIndex(int matchIndex) { + this.matchIndex = matchIndex; + } + + public void setMatchOffset(int matchOffset) { + this.matchOffset = matchOffset; + } + + public int increaseMatchIndex() { + matchOffset += tokenList().tokenOrEmbeddingUnsync(matchIndex++).token().length(); + return matchOffset; + } + public LanguagePath languagePath() { return tokenList().languagePath(); } - + public int index() { return tokenChangeInfo.index(); } - public void setIndex(int tokenIndex) { - tokenChangeInfo.setIndex(tokenIndex); + public void setIndex(int index) { + tokenChangeInfo.setIndex(index); } public int offset() { @@ -113,65 +136,59 @@ public void setOffset(int offset) { tokenChangeInfo.setOffset(offset); } - - public int offsetGapIndex() { - return offsetGapIndex; + + public int removedTokenCount() { + return matchIndex - index(); } - public void setOffsetGapIndex(int offsetGapIndex) { - this.offsetGapIndex = offsetGapIndex; + public int removedEndOffset() { + return matchOffset; // In after-mod coordinates + } + + public int addedEndOffset() { + return matchOffset; } public void addToken(AbstractToken token, int lookahead, Object state) { - if (addedTokensOrBranches == null) { - addedTokensOrBranches = new ArrayList(2); + if (addedTokenOrEmbeddings == null) { + addedTokenOrEmbeddings = new ArrayList>(2); laState = LAState.empty(); } - addedTokensOrBranches.add(token); + addedTokenOrEmbeddings.add(token); laState = laState.add(lookahead, state); } - public List addedTokensOrBranches() { - return addedTokensOrBranches; + public List> addedTokenOrEmbeddings() { + return addedTokenOrEmbeddings; } - public int addedTokensOrBranchesCount() { - return (addedTokensOrBranches != null) ? addedTokensOrBranches.size() : 0; + public int addedTokenOrEmbeddingsCount() { + return (addedTokenOrEmbeddings != null) ? addedTokenOrEmbeddings.size() : 0; } - public void removeLastAddedToken() { - int lastIndex = addedTokensOrBranches.size() - 1; - addedTokensOrBranches.remove(lastIndex); + /** + * @return end offset of previous (retained) token. + */ + public AbstractToken removeLastAddedToken() { + int lastIndex = addedTokenOrEmbeddings.size() - 1; + AbstractToken token = addedTokenOrEmbeddings.remove(lastIndex).token(); laState.remove(lastIndex, 1); + matchIndex--; + matchOffset -= token.length(); + return token; } public AbstractToken addedToken(int index) { - return LexerUtilsConstants.token(addedTokensOrBranches.get(0)); + return addedTokenOrEmbeddings.get(0).token(); } public void syncAddedTokenCount() { - tokenChangeInfo.setAddedTokenCount(addedTokensOrBranches.size()); + tokenChangeInfo.setAddedTokenCount(addedTokenOrEmbeddings.size()); } - public void setRemovedTokens(Object[] removedTokensOrBranches) { + public void setRemovedTokens(TokenOrEmbedding[] removedTokensOrBranches) { tokenChangeInfo.setRemovedTokenList(new RemovedTokenList( languagePath(), removedTokensOrBranches)); - } - - public int removedEndOffset() { - return removedEndOffset; - } - - public void setRemovedEndOffset(int removedEndOffset) { - this.removedEndOffset = removedEndOffset; - } - - public int addedEndOffset() { - return addedEndOffset; - } - - public void setAddedEndOffset(int addedEndOffset) { - this.addedEndOffset = addedEndOffset; } public boolean isBoundsChange() { @@ -188,42 +205,44 @@ @Override public String toString() { - return toString(0); + StringBuilder sb = new StringBuilder(); + sb.append('"').append(languagePath().innerLanguage().mimeType()); + sb.append("\", ind=").append(index()); + sb.append(", off=").append(offset()); + sb.append(", mInd=").append(matchIndex); + sb.append(", mOff=").append(matchOffset); + sb.append(", Add:").append(addedTokenOrEmbeddingsCount()); + sb.append(", tCnt=").append(tokenList().tokenCountCurrent()); + if (isBoundsChange()) { + sb.append(", BoChan"); + } + return sb.toString(); } - - public String toString(int indent) { + + public String toStringMods(int indent) { StringBuilder sb = new StringBuilder(); - sb.append('"'); - sb.append(languagePath().innerLanguage().mimeType()); - sb.append("\", index="); - sb.append(index()); - sb.append(", offset="); - sb.append(offset()); - if (isBoundsChange()) { - sb.append(", boundsChange"); - } TokenList removedTL = tokenChangeInfo.removedTokenList(); if (removedTL != null && removedTL.tokenCount() > 0) { int digitCount = ArrayUtilities.digitCount(removedTL.tokenCount() - 1); for (int i = 0; i < removedTL.tokenCount(); i++) { sb.append('\n'); ArrayUtilities.appendSpaces(sb, indent); - sb.append("R["); + sb.append("Rem["); ArrayUtilities.appendIndex(sb, i, digitCount); sb.append("]: "); - LexerUtilsConstants.appendTokenInfo(sb, removedTL, i, null, false, 0); + LexerUtilsConstants.appendTokenInfo(sb, removedTL, i, null, false, 0, true); } } - if (addedTokensOrBranches() != null) { - int digitCount = ArrayUtilities.digitCount(addedTokensOrBranches().size() - 1); - for (int i = 0; i < addedTokensOrBranches().size(); i++) { + if (addedTokenOrEmbeddings() != null) { + int digitCount = ArrayUtilities.digitCount(addedTokenOrEmbeddings().size() - 1); + for (int i = 0; i < addedTokenOrEmbeddings().size(); i++) { sb.append('\n'); ArrayUtilities.appendSpaces(sb, indent); - sb.append("A["); + sb.append("Add["); ArrayUtilities.appendIndex(sb, i, digitCount); sb.append("]: "); - LexerUtilsConstants.appendTokenInfo(sb, addedTokensOrBranches.get(i), - laState.lookahead(i), laState.state(i), null, false, 0); + LexerUtilsConstants.appendTokenInfo(sb, addedTokenOrEmbeddings.get(i), + laState.lookahead(i), laState.state(i), null, false, 0, true); } } return sb.toString(); diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/TokenListListUpdate.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/inc/TokenListListUpdate.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,208 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.inc; + +import java.util.ArrayList; +import java.util.List; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.EmbeddedTokenList; +import org.netbeans.lib.lexer.JoinTokenList; +import org.netbeans.lib.lexer.TokenList; +import org.netbeans.lib.lexer.TokenListList; + +/** + * Change of a particular TokenListList. + * + * @author Miloslav Metelka + */ + +final class TokenListListUpdate { + + /** + * Token list list for the case when the particular language path + * corresponds to joined. + */ + final TokenListList tokenListList; + + int modTokenListIndex; + + int removedTokenListCount; + + List> addedTokenLists; + + TokenListListUpdate(TokenListList tokenListList) { + this.tokenListList = tokenListList; + this.modTokenListIndex = -1; + } + + public boolean isTokenListsMod() { // If any ETL was removed/added + return (removedTokenListCount != 0) || addedTokenLists.size() > 0; + } + + public int tokenListCountDiff() { + return addedTokenLists.size() - removedTokenListCount; + } + + public EmbeddedTokenList afterUpdateTokenList(JoinTokenList jtl, int tokenListIndex) { + EmbeddedTokenList etl; + if (tokenListIndex < modTokenListIndex) { + etl = jtl.tokenList(tokenListIndex); + } else if (tokenListIndex - modTokenListIndex < addedTokenLists.size()) { + etl = addedTokenLists.get(tokenListIndex - modTokenListIndex); + } else { // Last part after removed and added + etl = jtl.tokenList(modTokenListIndex + removedTokenListCount - addedTokenLists.size()); + } + return etl; + } + + protected int afterUpdateTokenListCount(JoinTokenList jtl) { + return jtl.tokenListCount() - removedTokenListCount + addedTokenLists.size(); + } + + void markChangedMember(EmbeddedTokenList changedTokenList) { + assert (modTokenListIndex == -1); + modTokenListIndex = tokenListList.findIndex(changedTokenList.startOffset()); + assert (tokenListList.get(modTokenListIndex) == changedTokenList); + } + + void markChageBetween(int offset) { // Nothing added/removed and mod outside of bounds of an ETL + assert (modTokenListIndex == -1); + modTokenListIndex = tokenListList.findIndex(offset); + } + + /** + * Mark the given token list as removed in the token list list. + * All removed token lists should be marked subsequently their increasing offset + * so it should be necessary to search for the index just once. + *
+ * It's expected that updateStatusImpl() was already called + * on the corresponding embedding container. + */ + void markRemovedMember(EmbeddedTokenList removedTokenList, TokenHierarchyEventInfo eventInfo) { + boolean indexWasMinusOne; // Used for possible exception cause debugging +// removedTokenList.embeddingContainer().checkStatusUpdated(); + if (modTokenListIndex == -1) { + indexWasMinusOne = true; + modTokenListIndex = tokenListList.findIndexDuringUpdate(removedTokenList, eventInfo); + assert (modTokenListIndex >= 0) : "tokenListIndex=" + modTokenListIndex + " < 0"; // NOI18N + } else { // tokenListIndex already initialized + indexWasMinusOne = false; + } + TokenList markedForRemoveTokenList = tokenListList.getOrNull(modTokenListIndex + removedTokenListCount); + if (markedForRemoveTokenList != removedTokenList) { + int realIndex = tokenListList.indexOf(removedTokenList); + throw new IllegalStateException("Removing at tokenListIndex=" + modTokenListIndex + // NOI18N + " but real tokenListIndex is " + realIndex + // NOI18N + " (indexWasMinusOne=" + indexWasMinusOne + ").\n" + // NOI18N + "Wishing to remove tokenList\n" + removedTokenList + // NOI18N + "\nbut marked-for-remove tokenList is \n" + markedForRemoveTokenList + // NOI18N + "\nfrom tokenListList\n" + tokenListList + // NOI18N + "\n\nModification description:\n" + eventInfo.modificationDescription(true) // NOI18N + ); + } + removedTokenListCount++; + } + + /** + * Mark the given token list to be added to this list of token lists. + * At the end first the token lists marked for removal will be removed + * and then the token lists marked for addition will be added. + *
+ * It's expected that updateStatusImpl() was already called + * on the corresponding embedding container. + */ + void markAddedMember(EmbeddedTokenList addedTokenList) { +// addedTokenList.embeddingContainer().checkStatusUpdated(); + if (addedTokenLists == null) { + if (modTokenListIndex == -1) { + modTokenListIndex = tokenListList.findIndex(addedTokenList.startOffset()); + assert (modTokenListIndex >= 0) : "tokenListIndex=" + modTokenListIndex + " < 0"; // NOI18N + } + addedTokenLists = new ArrayList>(4); + } + addedTokenLists.add(addedTokenList); + } + + void addRemoveTokenLists(TokenHierarchyUpdate update, boolean tllChildrenMayExist) { + assert (removedTokenListCount > 0 || addedTokenLists != null); + EmbeddedTokenList[] removedTokenLists = tokenListList.replace( + modTokenListIndex, removedTokenListCount, addedTokenLists); + if (tllChildrenMayExist) { + for (int i = 0; i < removedTokenLists.length; i++) { + update.collectRemovedEmbeddings(removedTokenLists[i]); + } + for (int i = 0; i < addedTokenLists.size(); i++) { + EmbeddedTokenList addedEtl = addedTokenLists.get(i); + update.collectAddedEmbeddings(addedEtl, 0, addedEtl.tokenCountCurrent()); + } + } + } + + TokenListChange createTokenListChange(EmbeddedTokenList etl) { + assert (etl != null); + TokenListChange etlTokenListChange; + if (tokenListList.joinSections()) { + MutableJoinTokenList jtl = MutableJoinTokenList.create(tokenListList, modTokenListIndex, etl); + etlTokenListChange = new JoinTokenListChange(jtl); + } else { // Non-joining + etlTokenListChange = new TokenListChange(etl); + } + return etlTokenListChange; + } + + TokenListChange createJoinTokenListChange() { + assert (tokenListList.joinSections()); + // In case when adding at jtl.tokenListCount() a last ETL must be used + int etlIndex = Math.min(modTokenListIndex, tokenListList.size() - 1); + MutableJoinTokenList jtl = MutableJoinTokenList.create(tokenListList, etlIndex, tokenListList.get(etlIndex)); + return new JoinTokenListChange(jtl); + } + + @Override + public String toString() { + return " modTokenListIndex=" + modTokenListIndex + // NOI18N + "; Rem:" + removedTokenListCount + // NOI18N + " Add:" + addedTokenLists.size() + // NOI18N + " Size:" + tokenListList.size(); // NOI18N + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/inc/TokenListUpdater.java --- a/lexer/src/org/netbeans/lib/lexer/inc/TokenListUpdater.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/inc/TokenListUpdater.java Wed May 28 14:48:55 2008 +0200 @@ -44,15 +44,19 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.LanguageOperation; +import org.netbeans.lib.editor.util.CharSequenceUtilities; +import org.netbeans.lib.lexer.EmbeddedJoinInfo; +import org.netbeans.lib.lexer.EmbeddedTokenList; +import org.netbeans.lib.lexer.JoinLexerInputOperation; +import org.netbeans.lib.lexer.JoinTokenList; import org.netbeans.lib.lexer.LexerInputOperation; import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.lib.lexer.token.AbstractToken; -import org.netbeans.spi.lexer.TokenValidator; +import org.netbeans.lib.lexer.token.PartToken; /** - * Token updater fixes a list of tokens constructed for a document + * Token list updater fixes a list of tokens constructed for a document * after text of the document gets modified. *
* Subclasses need to define all the abstract methods @@ -62,25 +66,25 @@ * Updater looks similar to list iterator * but there are differences in the semantics * of iterator's modification operations. - *

+ *
* The algorithm used in the {@link #update(int, int)} * is based on "General Incremental Lexical Analysis" written * by Tim A. Wagner and Susan L. Graham, University * of California, Berkeley. It's available online * at * twagner-lexing.pdf. - *
+ *
* Ending EOF token is not used but the lookahead * of the ending token(s) is increased by one (past the end of the input) * if they have reached the EOF. - *
+ *
* Non-startable tokens are not supported. - *
+ *
* When updating a token with lookback one as a result * of modification the lookahead of the preceding token is inspected * to find out whether the modification has really affected it. * This can often save the previous token from being relexed. - *
+ *
* Currently the algorithm computes the lookback values on the fly * and it does not store the lookback in the tokens. For typical languages * the lookback is reasonably small (0, 1 or 2) so it's usually not worth @@ -88,19 +92,30 @@ * There would also be an additional overhead of updating the lookback * values in the tokens after the modification and the algorithm code would * be somewhat less readable. + *

* *

* The algorithm removes the affected tokens in the natural order as they * follow in the token stream. That can be used when the removed tokens * need to be collected (e.g. in an array). - *
+ *
* If the offset and state after token recognition matches * the end offset and state after recognition of the originally present * token then the relexing is stopped because a match was found and the newly * produced tokens would match the present ones. - *
+ *
* Otherwise the token(s) in the list are removed and replaced * by the relexed token and the relexing continues until a match is reached. + *

+ * + *

+ * When using token list updater with JoinTokenList.Mutable there is a special treatment + * of offsets independent of the underlying JoinTokenListChange and LexerInputOperation. + * The updater treats the modOffset to be relative (in the number of characters) + * to the relexOffset point (which is a real first relexed token's offset; it's necessary + * for restarting of the lexer input operation) so when going over a JoinToken + * the modOffset must be recomputed to not contain the gaps between individual join token parts. + *

* * @author Miloslav Metelka * @version 1.00 @@ -112,243 +127,115 @@ private static final Logger LOG = Logger.getLogger(TokenListUpdater.class.getName()); /** - * Use incremental algorithm to update the list of tokens + * Use incremental algorithm to update a regular list of tokens (IncTokenList or EmbeddedTokenList) * after a modification done in the underlying storage. * - * @param tokenList non-null token list that is being updated. It may be top-level list - * or embedded token list. + * @param change non-null change that will incorporate the performed chagnes. * @param modOffset offset where the modification occurred. * @param insertedLength number of characters inserted at modOffset. * @param removedLength number of characters removed at modOffset. - * @param change non-null change that will incorporate the performed chagnes. - * @param zeroIndexRelexState state used for relexing at index 0. */ - public static void update(MutableTokenList tokenList, - int modOffset, int insertedLength, int removedLength, - TokenListChange change, Object zeroIndexRelexState) { - // Fetch offset where the modification occurred - LanguageOperation languageOperation = LexerUtilsConstants.innerLanguageOperation( - tokenList.languagePath()); - - int tokenCount = tokenList.tokenCountCurrent(); // presently created token count - // Now determine which token is the first to be relexed. - // If it would be either modified token or previous-of-modified token - // (for modification right at the begining of modified token) - // then the token will be attempted to be validated (without running - // a lexer). - AbstractToken modToken; - // modTokenOffset holds begining of the token in which the modification occurred. - int modTokenOffset; - // index points to the modified token - int index; - + public static void updateRegular(TokenListChange change, TokenHierarchyEventInfo eventInfo) { + MutableTokenList tokenList = change.tokenList(); + int tokenCount = tokenList.tokenCountCurrent(); boolean loggable = LOG.isLoggable(Level.FINE); if (loggable) { - LOG.log(Level.FINE, "TokenListUpdater.update() STARTED\nmodOffset=" + modOffset - + ", insertedLength=" + insertedLength - + ", removedLength=" + removedLength - + ", tokenCount=" + tokenCount + "\n"); + logModification(tokenList.inputSourceText(), eventInfo, tokenCount, false); + } + + // Find modified token by binary search in existing tokens + // Use LexerUtilsConstants.tokenIndexBinSearch() to NOT lazily create new tokens here + int[] indexAndTokenOffset = LexerUtilsConstants.tokenIndexBinSearch(tokenList, eventInfo.modOffset(), tokenCount); + // Index and offset from which the relexing will start + int relexIndex = indexAndTokenOffset[0]; + // relexOffset points to begining of a token in which the modification occurred + // or which is affected by a modification (its lookahead points beyond modification point). + int relexOffset = indexAndTokenOffset[1]; + if (relexIndex == -1) { // No tokens at all + relexIndex = 0; + relexOffset = tokenList.startOffset(); } - if (tokenCount == 0) { // no tokens yet or all removed - if (!tokenList.isFullyLexed()) { - // No tokens created yet (they get created lazily). + // Index of token before which the relexing will end (or == tokenCount) + int matchIndex = relexIndex; + // Offset of token at matchIndex + int matchOffset = relexOffset; + + if (relexIndex == tokenCount) { // Change right at end of last token or beyond it (if not fully lexed) + // relexOffset set to end offset of the last token + if (!tokenList.isFullyLexed() && eventInfo.modOffset() >= relexOffset + + ((relexIndex > 0) ? tokenList.lookahead(relexIndex - 1) : 0) + ) { // Do nothing if beyond last token's lookahed + // Check whether the last token could be affected at all + // by checking whether the modification was performed + // in the last token's lookahead. + // For fully lexed inputs the characters added to the end + // must be properly lexed and notified (even if the last present + // token has zero lookahead). if (loggable) { - LOG.log(Level.FINE, "TokenListUpdater.update() FINISHED: Not fully lexed yet.\n"); + LOG.log(Level.FINE, "TLU.updateRegular() FINISHED: Not fully lexed yet. rOff=" + + relexOffset + ", mOff=" + eventInfo.modOffset() + "\n"); } - return; // Do nothing in this case - } - // If fully lexed and no tokens then the tokens should start - // right at the modification offset - modToken = null; - modTokenOffset = modOffset; - index = 0; + change.setIndex(relexIndex); + change.setOffset(relexOffset); + change.setMatchIndex(matchIndex); + change.setMatchOffset(matchOffset); + tokenList.replaceTokens(change, eventInfo.diffLength()); + return; // not affected at all + } // change.setIndex() will be performed later in relex() - } else { // at least one token exists - // Check whether the modification at modOffset might affect existing tokens - // Get index of the token in which the modification occurred - // Get the offset of the last token into modTokenOffset variable - index = tokenCount - 1; - modTokenOffset = tokenList.tokenOffset(index); - if (modOffset >= modTokenOffset) { // inside or above the last token? - modToken = token(tokenList, index); - int modTokenEndOffset = modTokenOffset + modToken.length(); - if (modOffset >= modTokenEndOffset) { // above last token - // Modification was right at the end boundary of the last token - // or above it (token list can be created lazily so that is valid case). - // Check whether the last token could be affected at all - // by checking the last token's lookahead. - // For fully lexed inputs the characters added to the end - // must be properly lexed and notified (even if the last present - // token has zero lookahead). - if (!tokenList.isFullyLexed() - && modOffset >= modTokenEndOffset + tokenList.lookahead(index) - ) { - if (loggable) { - LOG.log(Level.FINE, "TokenListUpdater.update() FINISHED: Not fully lexed yet. modTokenOffset=" - + modTokenOffset + ", modToken.length()=" + modToken.length() + "\n"); - } - return; // not affected at all - } + // Leave matchOffset as is (will possibly end relexing at tokenCount and unfinished relexing + // will be continued by replaceTokens()). + // For fully lexed lists it is necessary to lex till the end of input. + if (tokenList.isFullyLexed()) + matchOffset = Integer.MAX_VALUE; - index++; - modToken = null; - modTokenOffset = modTokenEndOffset; - } // else -> modification inside the last token - - } else { // modification in non-last token - // Find modified token by binary search - int low = 0; // use index as 'high' - while (low <= index) { - int mid = (low + index) / 2; - int midStartOffset = tokenList.tokenOffset(mid); - - if (midStartOffset < modOffset) { - low = mid + 1; - } else if (midStartOffset > modOffset) { - index = mid - 1; - } else { - // Token starting exactly at modOffset found - index = mid; - modTokenOffset = midStartOffset; - break; - } + } else { // relexIndex < tokenCount + // Possibly increase matchIndex and matchOffset by skipping the tokens in the removed area + if (eventInfo.removedLength() > 0) { // At least remove token at relexOffset + matchOffset += tokenList.tokenOrEmbeddingUnsync(matchIndex++).token().length(); + int removedEndOffset = eventInfo.modOffset() + eventInfo.removedLength(); + while (matchOffset < removedEndOffset && matchIndex < tokenCount) { + matchOffset += tokenList.tokenOrEmbeddingUnsync(matchIndex++).token().length(); } - if (index < low) { // no token starting right at 'modOffset' - modTokenOffset = tokenList.tokenOffset(index); - } - modToken = token(tokenList, index); - if (loggable) { - LOG.log(Level.FINE, "BIN-SEARCH: index=" + index - + ", modTokenOffset=" + modTokenOffset - + ", modToken.id()=" + modToken.id() + "\n"); + } else { // For inside-token inserts match on the next token + if (matchOffset < eventInfo.modOffset()) { + matchOffset += tokenList.tokenOrEmbeddingUnsync(matchIndex++).token().length(); } } + // Update the matchOffset so that it corresponds to the state + // after the modification + matchOffset += eventInfo.diffLength(); } - // Store the index that points to the modified token - // i.e. modification at its begining or inside. - // Index variable can later be modified but present value is important - // for moving of the offset gap later. - change.setOffsetGapIndex(index); - - // Index and offset from which the relexing will start. - int relexIndex; - int relexOffset; - // Whether the token validation should be attempted or not. - boolean attemptValidation = false; - - if (index == 0) { // modToken is first in the list - relexIndex = index; - relexOffset = modTokenOffset; - // Can validate modToken if removal does not span whole token - if (modToken != null && removedLength < modToken.length()) { - attemptValidation = true; + // Check whether modification affected previous token + while (relexIndex > 0 && relexOffset + tokenList.lookahead(relexIndex - 1) > eventInfo.modOffset()) { + relexIndex--; + if (loggable) { + LOG.log(Level.FINE, " Token at rInd=" + relexIndex + " affected (la=" + // NOI18N + tokenList.lookahead(relexIndex) + ") => relex it\n"); // NOI18N } - - } else { // Previous token exists - // Check for insert-only right at the end of the previous token - if (modOffset == modTokenOffset && removedLength == 0) { - index--; // move to previous token - modToken = token(tokenList, index); - modTokenOffset -= modToken.length(); - } - - // Check whether modification affected previous token - if (index == 0 || modTokenOffset + tokenList.lookahead(index - 1) <= modOffset) { - // Modification did not affect previous token - relexIndex = index; - relexOffset = modTokenOffset; - // Check whether modification was localized to modToken only - if (modOffset + removedLength < modTokenOffset + modToken.length()) { - attemptValidation = true; - } - - } else { // at least previous token affected - relexOffset = modTokenOffset - token(tokenList, index - 1).length(); - relexIndex = index - 2; // Start with token below previous token - - // Go back and mark all affected tokens for removals - while (relexIndex >= 0) { - AbstractToken token = token(tokenList, relexIndex); - // Check if token was not affected by modification - if (relexOffset + tokenList.lookahead(relexIndex) <= modOffset) { - break; - } - relexIndex--; - relexOffset -= token.length(); - } - relexIndex++; // Next token will be relexed - } + AbstractToken token = tokenList.tokenOrEmbeddingUnsync(relexIndex).token(); + relexOffset -= token.length(); } - // The lowest offset at which the relexing can end - // (the relexing may end at higher offset if the relexed - // tokens will end at different boundaries than the original - // tokens or if the states after the tokens' recognition - // will differ from the original states in the original tokens. - int matchOffset; - - // Perform token validation of modToken if possible. - // The index variable will hold the token index right before the matching point. - if (attemptValidation) { - matchOffset = modTokenOffset + modToken.length(); - TokenValidator tokenValidator = languageOperation.tokenValidator(modToken.id()); - if (tokenValidator != null && (tokenList.getClass() != IncTokenList.class)) { - -// if (tokenValidator.validateToken(modToken, modOffset - modTokenOffset, modRelOffset, -// removedLength, insertedLength) -// ) { -// // Update positions -// change.initRemovedAddedOffsets() - -// return; // validated successfully -// } - } - - } else { // Validation cannot be attempted - // Need to compute matchOffset and matchIndex - // by iterating forward - if (index < tokenCount) { - matchOffset = modTokenOffset + modToken.length(); - int removeEndOffset = modOffset + removedLength; - while (matchOffset < removeEndOffset && index + 1 < tokenCount) { - index++; - matchOffset += token(tokenList, index).length(); - } - - } else // After last token - matchOffset = modTokenOffset; - } - + // Check whether actual relexing is necessary // State from which the lexer can be started - Object relexState = (relexIndex > 0) ? tokenList.state(relexIndex - 1) : zeroIndexRelexState; - // Update the matchOffset so that it corresponds to the state - // after the modification - matchOffset += insertedLength - removedLength; + Object relexState = (relexIndex > 0) ? tokenList.state(relexIndex - 1) : null; + change.setIndex(relexIndex); change.setOffset(relexOffset); + change.setMatchIndex(matchIndex); + change.setMatchOffset(matchOffset); - // Variables' values: - // 'index' - points to modified token. Or index == tokenCount for modification - // past the last token. - // 'tokenCount' - token count in the original token list. - // 'relexIndex' - points to the token that will be relexed as first. - // 'relexOffset' - points to begining of the token that will be relexed as first. - // 'matchOffset' - points to end of token after which the fixed token list could - // possibly match the original token list. Points to end of token at 'index' - // variable if 'index < tokenCount' and to the end of the last token - // if 'index == tokenCount'. - // Check whether relexing is necessary. - // Necessary condition for no-relexing is that the matchToken - // has zero lookahead (if lookahead would be >0 - // then the matchToken would be affected and relexOffset != matchOffset). - // The states before relex token must match the state after the modified token - // In case of removal starting and ending at token boundaries - // the relexing might not be necessary. + // Necessary condition for no-relexing is a removal at token's boundary + // and the token right before modOffset must have zero lookahead (if lookahead would be >0 + // then the token would be affected) and the states before relexIndex must equal + // to the state before matchIndex. boolean relex = (relexOffset != matchOffset) - || index >= tokenCount - || !LexerUtilsConstants.statesEqual(relexState, tokenList.state(index)); + || (eventInfo.insertedLength() > 0) + || (matchIndex == 0) // ensure the tokenList.state(matchIndex - 1) will not fail with IOOBE + || !LexerUtilsConstants.statesEqual(relexState, tokenList.state(matchIndex - 1)); // There is an extra condition that the lookahead of the matchToken // must not span the next (retained) token. This condition helps to ensure @@ -356,192 +243,415 @@ // As the empty tokens are not allowed the situation may only occur // for lookahead > 1. int lookahead; - if (!relex && (lookahead = tokenList.lookahead(index)) > 1 && index + 1 < tokenCount) { - relex = (lookahead > token(tokenList, index + 1).length()); // check next token + if (!relex && (lookahead = tokenList.lookahead(matchIndex - 1)) > 1 && matchIndex < tokenCount) { + // Check whether lookahead of the token before match point exceeds the whole token right after match point + relex = (lookahead > tokenList.tokenOrEmbeddingUnsync(matchIndex).token().length()); // check next token } if (loggable) { - LOG.log(Level.FINE, "BEFORE-RELEX: index=" + index + ", modTokenOffset=" + modTokenOffset - + ", relexIndex=" + relexIndex + ", relexOffset=" + relexOffset - + ", relexState=" + relexState - + ", matchOffset=" + matchOffset - + ", perform relex: " + relex + "\n"); + StringBuilder sb = new StringBuilder(200); + sb.append("BEFORE-RELEX: relex=").append(relex); + sb.append(", rInd=").append(relexIndex).append(", rOff=").append(relexOffset); + sb.append(", mInd=").append(matchIndex).append(", mOff=").append(matchOffset).append('\n'); + sb.append(", rSta=").append(relexState).append(", tokenList-part:\n"); + LexerUtilsConstants.appendTokenList(sb, tokenList, matchIndex, matchIndex - 3, matchIndex + 3, false, 4, false); + sb.append('\n'); + LOG.log(Level.FINE, sb.toString()); + } + + assert (relexIndex >= 0); + if (relex) { + // Create lexer input operation for the given token list + LexerInputOperation lexerInputOperation + = tokenList.createLexerInputOperation(relexIndex, relexOffset, relexState); + relex(change, lexerInputOperation, tokenCount); + } + + tokenList.replaceTokens(change, eventInfo.diffLength()); + if (loggable) { + LOG.log(Level.FINE, "TLU.updateRegular() FINISHED: change:" + change + "\nMods:" + change.toStringMods(4)); + } + } + + + /** + * Use incremental algorithm to update a JoinTokenList after a modification done in the underlying storage. + *
+ * The assumption is that there may only be two states: + *
    + *
  • There is a local input source modification bounded to a particular ETL. + * In such case there should be NO token lists removed/added. + *
  • + *
  • The modification spans multiple ETLs and all the affected ETLs will be removed. + * The modification is "bounded" by the removed ETLs i.e. + * modOffset >= first-removed-ETL.startOffset() + * and modOffset + removedLength <= last-removed-ETL.endOffset() + *
  • + *
+ * + * @param change non-null change that will incorporate the performed chagnes. + * @param modOffset offset where the modification occurred. + * For join token lists if modification is done inside a JoinToken + * the modOffset must be a logical distance from token's begining + * that corresponds to the modificaion's point (i.e. like if the token + * would be continuous). + * @param insertedLength number of characters inserted at modOffset. + * @param removedLength number of characters removed at modOffset. + */ + public static void updateJoined(JoinTokenListChange change, TokenHierarchyEventInfo eventInfo) { + MutableJoinTokenList jtl = (MutableJoinTokenList) change.tokenList(); + TokenListListUpdate tokenListListUpdate = change.tokenListListUpdate(); + int tokenCount = jtl.tokenCount(); + boolean loggable = LOG.isLoggable(Level.FINE); + if (loggable) { + logModification(jtl.inputSourceText(), eventInfo, tokenCount, true); } - if (relex) { // Start relexing - LexerInputOperation lexerInputOperation - = tokenList.createLexerInputOperation(relexIndex, relexOffset, relexState); + // First determine what area is affected by removed/added ETLs + int relexJoinIndex; + int modOffset = eventInfo.modOffset(); + int relexTokenListIndex = tokenListListUpdate.modTokenListIndex; // Index of ETL where a change occurred. + // Relative distance of mod against relex point (or point of ETLs added/removed) + int relModOffset; + if (tokenListListUpdate.isTokenListsMod()) { + // Find relexJoinIndex by examining ETL at relexTokenListIndex-1. + // This way the code is more uniform than examining ETL at relexTokenListIndex. + if (relexTokenListIndex > 0) { // non-first ETL + relexTokenListIndex--; + jtl.setActiveTokenListIndex(relexTokenListIndex); + EmbeddedTokenList relexEtl = jtl.activeTokenList(); + EmbeddedJoinInfo joinInfo = relexEtl.joinInfo; + relexJoinIndex = jtl.activeEndJoinIndex(); + if (joinInfo.joinTokenLastPartShift() > 0) { // Mod points inside join token + // Find first non-empty ETL below to determine partTextOffset() + while (relexEtl.tokenCountCurrent() == 0) { // No tokens in ETL + jtl.setActiveTokenListIndex(--relexTokenListIndex); + relexEtl = jtl.activeTokenList(); + } + // relexEtl is non-empty - last token is PartToken + PartToken partToken = (PartToken) relexEtl.tokenOrEmbeddingUnsync( + relexEtl.tokenCountCurrent() - 1).token(); + relModOffset = partToken.partTextOffset(); + } else { // Not a join token => use first token at relexTokenListIndex + relexTokenListIndex++; + relModOffset = 0; + } + } else { // (relexTokenListIndex == 0) + relexJoinIndex = 0; + jtl.setActiveTokenListIndex(0); + relModOffset = 0; + } - do { // Fetch new tokens from lexer as necessary - AbstractToken token = lexerInputOperation.nextToken(); - if (token == null) { - attemptValidation = false; + } else { // No token list mod + assert ((eventInfo.insertedLength() > 0) || (eventInfo.removedLength() > 0)) : "No modification"; + jtl.setActiveTokenListIndex(relexTokenListIndex); + EmbeddedTokenList relexEtl = jtl.activeTokenList(); + change.charModTokenList = relexEtl; + // Search within releEtl only - can use binary search safely (unlike on JTL with removed ETLs) + int[] indexAndTokenOffset = relexEtl.tokenIndex(modOffset); // Index could be -1 TBD + relexJoinIndex = relexEtl.joinInfo.joinTokenIndex() + indexAndTokenOffset[0]; + relModOffset = modOffset - indexAndTokenOffset[1]; + } + + // Matching point index and offset. Matching point vars are assigned early + // and relex-vars are possibly shifted down first and then the match-vars are updated. + // That's because otherwise the "working area" of JTL (above/below token list mod) + // would have to be switched below and above. + int matchJoinIndex = relexJoinIndex; + int matchOffset = modOffset - relModOffset; // Suitable for single-ETL update (will be corrected later) + + // Update relex-vars according to lookahead of tokens before relexJoinIndex + while (relexJoinIndex > 0 && jtl.lookahead(relexJoinIndex - 1) > relModOffset) { + AbstractToken relexToken = jtl.tokenOrEmbeddingUnsync(--relexJoinIndex).token(); + relModOffset += relexToken.length(); // User regular token.length() here + if (loggable) { + LOG.log(Level.FINE, " Token at rInd=" + relexJoinIndex + " affected (la=" + // NOI18N + jtl.lookahead(relexJoinIndex) + ") => relex it\n"); // NOI18N + } + } + + // Create lexer input operation now since JTL should be positioned before removed ETLs + // and JLIO needs to scan tokens backwards for fly sequence length. + Object relexState = (relexJoinIndex > 0) ? jtl.state(relexJoinIndex - 1) : null; + int relexLocalIndex = jtl.tokenStartLocalIndex(relexJoinIndex); + relexTokenListIndex = jtl.activeTokenListIndex(); + int relexOffset = jtl.activeTokenList().tokenOffsetByIndex(relexLocalIndex); + JoinLexerInputOperation lexerInputOperation = new MutableJoinLexerInputOperation( + jtl, relexJoinIndex, relexState, relexTokenListIndex, relexOffset, tokenListListUpdate); + lexerInputOperation.init(); + change.setIndex(relexJoinIndex); + change.setOffset(relexOffset); + change.setStartInfo(lexerInputOperation, relexLocalIndex); + // setMatchIndex() and setMatchOffset() called later below + + // Index of token before which the relexing will end (or == tokenCount) + if (tokenListListUpdate.isTokenListsMod()) { // Assign first token after last removed ETL + int afterModTokenListIndex = tokenListListUpdate.modTokenListIndex + tokenListListUpdate.removedTokenListCount; + if (afterModTokenListIndex == jtl.tokenListCount()) { // Removed till end + matchJoinIndex = tokenCount; + matchOffset = Integer.MAX_VALUE; + } else { // Removed inside + EmbeddedTokenList afterModEtl = jtl.tokenList(afterModTokenListIndex); + matchJoinIndex = afterModEtl.joinInfo.joinTokenIndex(); + // Check if the first token of afterModEtl is not an end of join token + // or that the afterModEtl does not participate in a join token (may be empty) + if (afterModEtl.tokenCountCurrent() > 0) { + AbstractToken token = afterModEtl.tokenOrEmbeddingUnsync(0).token(); + if (token.getClass() == PartToken.class) { + matchJoinIndex++; + matchOffset = afterModEtl.startOffset() + token.length(); + } else { + matchOffset = afterModEtl.startOffset(); + } + } else { // No tokens in this ETL + int joinTokenLastPartShift = afterModEtl.joinInfo.joinTokenLastPartShift(); + if (joinTokenLastPartShift > 0) { // Part of join token + afterModTokenListIndex += joinTokenLastPartShift; + matchJoinIndex++; + matchOffset = afterModEtl.startOffset(); + } else { // Empty ETL but not a part of JoinToken - ending empty ETL(s) + matchOffset = afterModEtl.startOffset(); + } + } + // Move jtl past removed/added token lists + jtl.setActiveTokenListIndex(afterModTokenListIndex); + + } + } else { // No token ETLs removed/added + // matchOffset already initialized to (modOffset - orig-relModOffset) + if (eventInfo.removedLength() > 0) { // At least remove token at relexOffset + matchOffset += jtl.tokenOrEmbeddingUnsync(matchJoinIndex++).token().length(); + int removedEndOffset = eventInfo.modOffset() + eventInfo.removedLength(); + while (matchOffset < removedEndOffset) { + matchOffset += jtl.tokenOrEmbeddingUnsync(matchJoinIndex++).token().length(); + } + } else { // For inside-token inserts match on the next token + if (matchOffset < eventInfo.modOffset()) { + matchOffset += jtl.tokenOrEmbeddingUnsync(matchJoinIndex++).token().length(); + } + } + // Update the matchOffset so that it corresponds to the state + // after the modification + matchOffset += eventInfo.diffLength(); + } + + // TBD relexing necessity optimizations like in updateRegular() + change.setMatchIndex(matchJoinIndex); + change.setMatchOffset(matchOffset); + relex(change, lexerInputOperation, tokenCount); + jtl.replaceTokens(change, eventInfo.diffLength()); + if (loggable) { + LOG.log(Level.FINE, "TLU.updateRegular() FINISHED: change:" + change + // NOI18N + "\nMods:" + change.toStringMods(4)); // NOI18N + } + } + + + /** + * Relex part of input to create new tokens. This method may sometimes be skipped e.g. for removal of chars + * corresponding to a single token preceded by a token with zero lookahead. + *
+ * This code is common for both updateRegular() and updateJoined(). + * + * @param tokenList non-null token list that is being updated. It may be top-level list + * or embedded token list. + * @param change token list change into which the created tokens are being added. + * @param tokenCount current token count in tokenList. + */ + private static void relex(TokenListChange change, + LexerInputOperation lexerInputOperation, int tokenCount + ) { + boolean loggable = LOG.isLoggable(Level.FINE); + MutableTokenList tokenList = change.tokenList(); + // Remember the match index below which the comparison of extra relexed tokens + // (matching the original ones) cannot go. + int lowestMatchIndex = change.matchIndex; + + AbstractToken token; + int relexOffset = lexerInputOperation.lastTokenEndOffset(); + while ((token = lexerInputOperation.nextToken()) != null) { + // Get lookahead and state; Will certainly use them both since updater runs for inc token lists only + int lookahead = lexerInputOperation.lookahead(); + Object state = lexerInputOperation.lexerState(); + if (loggable) { + StringBuilder sb = new StringBuilder(100); + sb.append("LEXED-TOKEN: "); + int tokenEndOffset = lexerInputOperation.lastTokenEndOffset(); + CharSequence inputSourceText = tokenList.inputSourceText(); + if (tokenEndOffset > inputSourceText.length()) { + sb.append(tokenEndOffset).append("!! => "); + tokenEndOffset = inputSourceText.length(); + sb.append(tokenEndOffset); + } + sb.append('"'); + CharSequenceUtilities.debugText(sb, inputSourceText.subSequence(relexOffset, tokenEndOffset)); + sb.append('"'); + sb.append(" ").append(token.id()); + sb.append(", <").append(relexOffset); + sb.append(", ").append(relexOffset + token.length()); + sb.append("> LA=").append(lookahead); + sb.append(", state=").append(state); + sb.append(", IHC=").append(System.identityHashCode(token)); + sb.append("\n"); + LOG.log(Level.FINE, sb.toString()); + } + + change.addToken(token, lookahead, state); + // Here add regular token length even for JoinToken instances + // since this is used solely for comparing with matchOffset which + // also uses the per-input-chars coordinates. Real token's offset is independent value + // assigned by the underlying TokenListChange and LexerInputOperation. + relexOffset = lexerInputOperation.lastTokenEndOffset(); + // Marks all original tokens that would cover the area of just lexed token as removed. + // 'matchIndex' will point right above the last token that was removed + // 'matchOffset' will point to the end of the last removed token + if (relexOffset > change.matchOffset) { + do { // Mark all tokens below + if (change.matchIndex == tokenCount) { // index == tokenCount + if (tokenList.isFullyLexed()) { + change.matchOffset = Integer.MAX_VALUE; // Force lexing till end of input + } else { // Not fully lexed -> stop now + // Fake the conditions to break the relexing loop + change.matchOffset = relexOffset; + state = tokenList.state(change.matchIndex - 1); + } + break; + } + // Skip the token at matchIndex and also increase matchOffset + // The default (increasing matchOffset by token.length()) is overriden for join token list. + change.increaseMatchIndex(); + } while (relexOffset > change.matchOffset); + } + + // Check whether the new token ends at matchOffset with the same state + // like the original which typically means end of relexing + if (relexOffset == change.matchOffset + && LexerUtilsConstants.statesEqual(state, + (change.matchIndex > 0) ? tokenList.state(change.matchIndex - 1) : null) + ) { + // Here it's a potential match and the relexing could end. + // However there are additional SAME-LOOKAHEAD requirements + // that are checked here and if not satisfied the relexing will continue. + // SimpleLexerRandomTest.test() contains detailed description. + + // If there are no more original tokens to be removed then stop since + // there are no tokens ahead that would possibly have to be relexed because of LA differences. + if (change.matchIndex == tokenCount) + break; + + int matchPointOrigLookahead = (change.matchIndex > 0) + ? tokenList.lookahead(change.matchIndex - 1) + : 0; + // If old and new LAs are the same it should be safe to stop relexing. + // Also since all tokens are non-empty it's enough to just check + // LA > 1 (because LA <= 1 cannot span more than one token). + // The same applies for current LA. + if (lookahead == matchPointOrigLookahead || + matchPointOrigLookahead <= 1 && lookahead <= 1 + ) { + break; + } + + int afterMatchPointTokenLength = tokenList.tokenOrEmbeddingUnsync(change.matchIndex).token().length(); + if (matchPointOrigLookahead <= afterMatchPointTokenLength && + lookahead <= afterMatchPointTokenLength + ) { + // Here both the original and relexed before-match-point token + // have their LAs ending within bounds of the after-match-point token so it's OK break; } - lookahead = lexerInputOperation.lookahead(); - Object state = lexerInputOperation.lexerState(); - if (loggable) { - LOG.log(Level.FINE, "LEXED-TOKEN: id=" + token.id() - + ", length=" + token.length() - + ", lookahead=" + lookahead - + ", state=" + state + "\n"); - } - - change.addToken(token, lookahead, state); - - relexOffset += token.length(); - // Remove obsolete tokens that would cover the area of just lexed token - // 'index' will point to the last token that was removed - // 'matchOffset' will point to the end of the last removed token - if (relexOffset > matchOffset && index < tokenCount) { - attemptValidation = false; - do { - index++; - if (index == tokenCount) { - // Make sure the number of removed tokens will be computed properly later - modToken = null; - // Check whether it should lex till the end - // or whether 'Match at anything' should be done - if (tokenList.isFullyLexed()) { - // Will lex till the end of input - matchOffset = Integer.MAX_VALUE; - } else { - // Force stop lexing - relex = false; - } - break; - } - matchOffset += token(tokenList, index).length(); - } while (relexOffset > matchOffset); + // It's true that nothing can be generally predicted about LA if the token after match point + // would be relexed (compared to the original's token LA). However the following criteria + // should possibly suffice. + int afterMatchPointOrigTokenLookahead = tokenList.lookahead(change.matchIndex); + if (lookahead - afterMatchPointTokenLength <= afterMatchPointOrigTokenLookahead && + (matchPointOrigLookahead <= afterMatchPointTokenLength || + lookahead >= matchPointOrigLookahead) + ) { + // The orig LA of after-match-point token cannot be lower than the currently lexed LA's projection into it. + // Also check that the orig lookahead ended in the after-match-point token + // or otherwise require the relexed before-match-point token to have >= lookahead of the original + // before-match-point token). + break; } - // Check whether the new token ends at matchOffset with the same state - // like the original which typically means end of relexing - if (relexOffset == matchOffset - && (index < tokenCount) - && LexerUtilsConstants.statesEqual(state, tokenList.state(index)) - ) { - // Here it's a potential match and the relexing could end. - // However there are additional conditions that need to be checked. - // 1. Check whether lookahead of the last relexed token - // does not exceed length plus LA of the subsequent (original) token. - // See initial part of SimpleRandomTest.test() verifies this. - // 2. Algorithm attempts to have the same lookaheads in tokens - // like the regular batch scanning would produce. - // Although not strictly necessary requirement - // it helps to simplify the debugging in case the lexer does not work - // well in the incremental setup. - // The following code checks that the lookahead of the original match token - // (i.e. the token right before matchOffset) does "end" inside - // the next token - if not then relexing the next token is done. - // The second part of SimpleRandomTest.test() verifies this. + // The token at matchIndex must be relexed + if (loggable) { + LOG.log(Level.FINE, " EXTRA-RELEX: mInd=" + change.matchIndex + ", LA=" + lookahead + "\n"); + } + // Skip the token at matchIndex + change.increaseMatchIndex(); + // Continue by fetching next token + } + } + lexerInputOperation.release(); - // 'index' points to the last token that was removed - int matchTokenLookahead = tokenList.lookahead(index); - // Optimistically suppose that the relexing will end - relex = false; - // When assuming non-empty tokens the lookahead 1 - // just reaches the end of the next token - // so lookhead < 1 is always fine from this point of view. - if (matchTokenLookahead > 1 || lookahead > 1) { - // Start with token right after the last removed token starting at matchOffset - int i = index + 1; - // Process additional removals by increasing 'index' - // 'lookahead' holds - while (i < tokenCount) { - int tokenLength = token(tokenList, i).length(); - lookahead -= tokenLength; // decrease extra lookahead - matchTokenLookahead -= tokenLength; - if (lookahead <= 0 && matchTokenLookahead <=0) { - break; // No more work - } - if (lookahead != tokenList.lookahead(i) - || matchTokenLookahead > 0 - ) { - // This token must be relexed - if (loggable) { - LOG.log(Level.FINE, "EXTRA-RELEX: index=" + index + ", lookahead=" + lookahead - + ", tokenLength=" + tokenLength + "\n"); - } - index = i; - matchOffset += tokenLength; - relex = true; - // Continue - further tokens may be affected - } - i++; - } - } - - if (!relex) { - if (attemptValidation) { -// if (modToken.id() == token.id() -// && tokenList.lookahead(index) == lookahead -// && !modToken.isFlyweight() -// && !token.isFlyweight() -// && (tokenList.getClass() != IncTokenList.class -// || change.tokenHierarchyOperation().canModifyToken(index, modToken)) -// && LexerSpiTokenPackageAccessor.get().restoreToken( -// languageOperation.tokenHandler(), -// modToken, token) -// ) { -// // Restored successfully -// // TODO implement - fix token's length and return -// // now default in fact to failed validation -// } - attemptValidation = false; - } - } - } - } while (relex); // End of the relexing loop - lexerInputOperation.release(); - - // If at least two tokens were lexed it's possible that e.g. the last added token - // will be the same like the last removed token and in such case - // the addition of the last token should be 'undone'. - // This all may happen due to the fact that for larger lookaheads - // the algorithm must relex the token(s) within lookahead (see the code above). - int lastAddedTokenIndex = change.addedTokensOrBranchesCount() - 1; - // There should remain at least one added token since that one - // may not be the same like the original removed one because - // token lengths would differ because of the input source modification. - while (lastAddedTokenIndex >= 1 && index > relexIndex && index < tokenCount) { - AbstractToken addedToken = LexerUtilsConstants.token( - change.addedTokensOrBranches().get(lastAddedTokenIndex)); - AbstractToken removedToken = token(tokenList, index); - if (addedToken.id() != removedToken.id() - || addedToken.length() != removedToken.length() - || change.laState().lookahead(lastAddedTokenIndex) != tokenList.lookahead(index) + // If at least two tokens were lexed it's possible that e.g. the last added token + // will be the same like the last removed token and in such case + // the addition of the last token should be 'undone'. + // This all may happen due to the fact that for larger lookaheads + // the algorithm must relex the token(s) within lookahead (see the code above). + int lastAddedTokenIndex = change.addedTokenOrEmbeddingsCount() - 1; + // There should remain at least one added token since that one + // may not be the same like the original removed one because + // token lengths would differ because of the input source modification. + + if (change.matchOffset != Integer.MAX_VALUE) { // would not make sense when lexing past end of existing tokens + while (lastAddedTokenIndex >= 1 && // At least one token added + change.matchIndex > lowestMatchIndex // At least one token removed + ) { + AbstractToken lastAddedToken = change.addedTokenOrEmbeddings().get(lastAddedTokenIndex).token(); + AbstractToken lastRemovedToken = tokenList.tokenOrEmbeddingUnsync(change.matchIndex - 1).token(); + if (lastAddedToken.id() != lastRemovedToken.id() + || lastAddedToken.length() != lastRemovedToken.length() + || change.laState().lookahead(lastAddedTokenIndex) != tokenList.lookahead(change.matchIndex - 1) || !LexerUtilsConstants.statesEqual(change.laState().state(lastAddedTokenIndex), - tokenList.state(index)) + tokenList.state(change.matchIndex - 1)) ) { break; } // Last removed and added tokens are the same so undo the addition if (loggable) { - LOG.log(Level.FINE, "RETAIN-ORIGINAL: index=" + index + ", id=" + removedToken.id() + "\n"); + LOG.log(Level.FINE, " RETAIN-ORIGINAL at (mInd-1)=" + (change.matchIndex-1) + + ", id=" + lastRemovedToken.id() + "\n"); } lastAddedTokenIndex--; - index--; - relexOffset -= addedToken.length(); - change.removeLastAddedToken(); + change.removeLastAddedToken(); // Includes decreasing of matchIndex and matchOffset } + } else { // matchOffset == Integer.MAX_VALUE + // Fix matchOffset to point to end of last token since it's used + // as last-added-token-end-offset in event notifications + change.setMatchOffset(relexOffset); } + } - // Now ensure that the original tokens will be replaced by the relexed ones. - int removedTokenCount = (modToken != null) ? (index - relexIndex + 1) : (index - relexIndex); - if (loggable) { - LOG.log(Level.FINE, "TokenListUpdater.update() FINISHED: Removed:" - + removedTokenCount + ", Added:" + change.addedTokensOrBranchesCount() + " tokens.\n"); + private static void logModification(CharSequence inputSourceText, TokenHierarchyEventInfo eventInfo, + int tokenCount, boolean updateJoined + ) { + int modOffset = eventInfo.modOffset(); + int removedLength = eventInfo.removedLength(); + int insertedLength = eventInfo.insertedLength(); + String insertedText = ""; + if (insertedLength > 0) { + insertedText = ", insTxt:\"" + CharSequenceUtilities.debugText( + inputSourceText.subSequence(modOffset, modOffset + insertedLength)) + '"'; } - change.setIndex(relexIndex); - change.setAddedEndOffset(relexOffset); - tokenList.replaceTokens(change, removedTokenCount, insertedLength - removedLength); - } - - private static AbstractToken token(MutableTokenList tokenList, int index) { - Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainerUnsync(index); // Unsync impl suffices - return LexerUtilsConstants.token(tokenOrEmbeddingContainer); + // Debug 10 chars around modOffset + int afterInsertOffset = modOffset + insertedLength; + CharSequence beforeText = inputSourceText.subSequence(Math.max(afterInsertOffset - 5, 0), afterInsertOffset); + CharSequence afterText = inputSourceText.subSequence(afterInsertOffset, + Math.min(afterInsertOffset + 5, inputSourceText.length())); + StringBuilder sb = new StringBuilder(200); + sb.append("TLU.update"); + sb.append(updateJoined ? "Joined" : "Regular"); + sb.append("() modOff=").append(modOffset); + sb.append(", text-around:\"").append(beforeText).append('|'); + sb.append(afterText).append("\", insLen="); + sb.append(insertedLength).append(insertedText); + sb.append(", remLen=").append(removedLength); + sb.append(", tCnt=").append(tokenCount).append('\n'); + LOG.log(Level.FINE, sb.toString()); } } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/AbstractToken.java --- a/lexer/src/org/netbeans/lib/lexer/token/AbstractToken.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/token/AbstractToken.java Wed May 28 14:48:55 2008 +0200 @@ -41,30 +41,49 @@ package org.netbeans.lib.lexer.token; +import org.netbeans.lib.lexer.TokenOrEmbedding; +import java.util.List; import org.netbeans.api.lexer.PartType; import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenHierarchy; import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.CharSequenceUtilities; import org.netbeans.lib.lexer.EmbeddedTokenList; -import org.netbeans.lib.lexer.LexerApiPackageAccessor; -import org.netbeans.lib.lexer.LexerUtilsConstants; +import org.netbeans.lib.lexer.EmbeddingContainer; import org.netbeans.lib.lexer.TokenList; /** * Abstract token is base class of all token implementations used in the lexer module. + *
+ * Two descendants of AbstractToken: + *
    + *
  • {@link DefaultToken} - by default does not contain a text but points + * into a text storage of its token list instead. It may however cache + * its text as string in itself. + *
      + *
    • + *
    + *
  • + *
  • {@link TextToken} - contains text that it represents; may act as flyweight token. + * {@link CustomTextToken} allows a token to have a custom text independent + * of text of an actual storage. + *
  • + * + * + * * * @author Miloslav Metelka * @version 1.00 */ -public abstract class AbstractToken extends Token implements CharSequence { +public abstract class AbstractToken extends Token +implements TokenOrEmbedding { private final T id; // 12 bytes (8-super + 4) - private TokenList tokenList; // 16 bytes + protected TokenList tokenList; // 16 bytes - private int rawOffset; // 20 bytes + protected int rawOffset; // 20 bytes /** * @id non-null token id. @@ -80,8 +99,6 @@ this.rawOffset = rawOffset; } - public abstract int length(); - /** * Get identification of this token. * @@ -90,22 +107,6 @@ @Override public final T id() { return id; - } - - /** - * Get text represented by this token. - */ - @Override - public CharSequence text() { - if (tokenList != null) { - if (tokenList.getClass() == EmbeddedTokenList.class) { - EmbeddedTokenList etl = (EmbeddedTokenList)tokenList; - return etl.embeddingContainer().updateStatus() ? this : null; - } - return this; - } else { - return null; - } } /** @@ -160,14 +161,13 @@ } @Override - public final int offset(TokenHierarchy tokenHierarchy) { - if (rawOffset == -1) { // flyweight token - return -1; + public int offset(TokenHierarchy tokenHierarchy) { + if (tokenList != null) { + if (tokenList.getClass() == EmbeddedTokenList.class) // Sync status first + ((EmbeddedTokenList)tokenList).embeddingContainer().updateStatus(); + return tokenList.tokenOffset(this); } - - return (tokenList != null) - ? tokenList.childTokenOffset(rawOffset) - : rawOffset; + return rawOffset; // Covers the case of flyweight token that will return -1 // if (tokenHierarchy != null) { // return LexerApiPackageAccessor.get().tokenHierarchyOperation( // tokenHierarchy).tokenOffset(this, tokenList, rawOffset); @@ -177,7 +177,7 @@ // : rawOffset; // } } - + @Override public boolean hasProperties() { return false; @@ -188,40 +188,38 @@ return null; } - // CharSequence methods - /** - * Implementation of CharSequence.charAt() - */ - public final char charAt(int index) { - if (index < 0 || index >= length()) { - throw new IndexOutOfBoundsException( - "index=" + index + ", length=" + length() // NOI18N - ); - } - if (tokenList == null) { // Should normally not happen - // A bit strange to throw IOOBE but it's more practical since - // TokenHierarchy's dump can overcome IOOBE and deliver a useful debug but not NPEs etc. - throw new IndexOutOfBoundsException("index=" + index + ", length=" + length() + - " but tokenList==null for token " + dumpInfo(null)); - } - return tokenList.childTokenCharAt(rawOffset, index); + @Override + public Token joinToken() { + return null; } - public final CharSequence subSequence(int start, int end) { - return CharSequenceUtilities.toString(this, start, end); + @Override + public List> joinedParts() { + return null; + } + + // Implements TokenOrEmbedding + public final AbstractToken token() { + return this; } - /** - * This method is in fact CharSequence.toString() implementation. - */ + // Implements TokenOrEmbedding + public final EmbeddingContainer embedding() { + return null; + } + @Override - public String toString() { - // To prevent NPEs when token.toString() would called without checking - // (text() == null) there is an extra check for that. - CharSequence text = text(); - return (text != null) - ? CharSequenceUtilities.toString(this, 0, length()) - : ""; + public boolean isRemoved() { + if (tokenList != null) { + if (tokenList.getClass() == EmbeddedTokenList.class) + ((EmbeddedTokenList)tokenList).embeddingContainer().updateStatus(); + return tokenList.isRemoved(); + } + return !isFlyweight(); + } + + public String dumpInfo() { + return dumpInfo(null, null, true, 0).toString(); } /** @@ -234,34 +232,40 @@ * * @param tokenHierarchy null should be passed * (the parameter is reserved for future use when token hierarchy snapshots will be implemented). + * @param dumpText whether text should be dumped (not for TokenListUpdater + * when text is already shifted). * @return dump of the thorough token information. If token's text is longer * than 400 characters it will be shortened. */ - public String dumpInfo(TokenHierarchy tokenHierarchy) { - StringBuilder sb = new StringBuilder(); - CharSequence text = text(); - if (text != null) { - sb.append('"'); - int textLength = text.length(); - for (int i = 0; i < textLength; i++) { - if (textLength > 400 && i >= 200 && i < textLength - 200) { - i = textLength - 200; - sb.append(" ...... "); // NOI18N - continue; + public StringBuilder dumpInfo(StringBuilder sb, TokenHierarchy tokenHierarchy, boolean dumpTokenText, int indent) { + if (sb == null) { + sb = new StringBuilder(50); + } + if (dumpTokenText) { + CharSequence text = text(); + if (text != null) { + sb.append('"'); + int textLength = text.length(); + for (int i = 0; i < textLength; i++) { + if (textLength > 400 && i >= 200 && i < textLength - 200) { + i = textLength - 200; + sb.append(" ...... "); // NOI18N + continue; + } + try { + CharSequenceUtilities.debugChar(sb, text.charAt(i)); + } catch (IndexOutOfBoundsException e) { + // For debugging purposes it's better than to completely fail + sb.append("IOOBE at index=").append(i).append("!!!"); // NOI18N + break; + } } - try { - CharSequenceUtilities.debugChar(sb, text.charAt(i)); - } catch (IndexOutOfBoundsException e) { - // For debugging purposes it's better than to completely fail - sb.append("IOOBE at index=").append(i).append("!!!"); // NOI18N - break; - } + sb.append('"'); + } else { + sb.append(""); // NOI18N } - sb.append('"'); - } else { - sb.append(""); // NOI18N + sb.append(' '); } - sb.append(' '); if (isFlyweight()) { sb.append("F(").append(length()).append(')'); } else { @@ -271,7 +275,7 @@ } sb.append(' ').append(id != null ? id.name() + '[' + id.ordinal() + ']' : ""); // NOI18N sb.append(" ").append(dumpInfoTokenType()); - return sb.toString(); + return sb; } protected String dumpInfoTokenType() { diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/ComplexToken.java --- a/lexer/src/org/netbeans/lib/lexer/token/ComplexToken.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ - -package org.netbeans.lib.lexer.token; - -import org.netbeans.api.lexer.PartType; -import org.netbeans.api.lexer.TokenId; -import org.netbeans.spi.lexer.TokenPropertyProvider; - -/** - * Token that may hold custom text and also additional properties. - * - * @author Miloslav Metelka - * @version 1.00 - */ - -public final class ComplexToken extends CustomTextToken { - - private final TokenPropertyProvider propertyProvider; // 36 bytes - - public ComplexToken(T id, int length, CharSequence customText, PartType partType, - TokenPropertyProvider propertyProvider) { - super(id, length, customText, partType); - this.propertyProvider = propertyProvider; - } - - @Override - public boolean hasProperties() { - return (propertyProvider != null); - } - - @Override - public Object getProperty(Object key) { - return (propertyProvider != null) ? propertyProvider.getValue(this, key) : null; - } - - @Override - protected String dumpInfoTokenType() { - return "ComT"; // NOI18N "ComplexToken" - } - -} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/CustomTextToken.java --- a/lexer/src/org/netbeans/lib/lexer/token/CustomTextToken.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/token/CustomTextToken.java Wed May 28 14:48:55 2008 +0200 @@ -41,12 +41,12 @@ package org.netbeans.lib.lexer.token; -import org.netbeans.api.lexer.PartType; import org.netbeans.api.lexer.TokenId; /** * Token with a custom text and the token length likely different - * from text's length. + * from text's length. It can be used to shrink size of the input chars + * being referenced from skim token list by referencing some fixed characters. *
    * Token with the custom text cannot be branched by a language embedding. * @@ -54,27 +54,28 @@ * @version 1.00 */ -public class CustomTextToken extends DefaultToken { +public class CustomTextToken extends TextToken { - private final CharSequence text; // 28 bytes (24-super + 4) - - private final PartType partType; // 32 bytes + private final int length; // 28 bytes (24-super + 4) /** * @param id non-null identification of the token. + * @param text non-null text of the token. * @param length length of the token. - * @param text non-null text of the token. */ - public CustomTextToken(T id, int length, CharSequence text, PartType partType) { - super(id, length); - assert (text != null); - this.text = text; - this.partType = partType; + public CustomTextToken(T id, CharSequence text, int length) { + super(id, text); + this.length = length; } @Override - public final CharSequence text() { - return text; + public boolean isCustomText() { + return true; + } + + @Override + public final int length() { + return length; } @Override diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/DefaultToken.java --- a/lexer/src/org/netbeans/lib/lexer/token/DefaultToken.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/token/DefaultToken.java Wed May 28 14:48:55 2008 +0200 @@ -42,6 +42,8 @@ package org.netbeans.lib.lexer.token; import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.editor.util.CharSequenceUtilities; +import org.netbeans.lib.lexer.LexerUtilsConstants; /** * Default token which by default obtains text from its background storage. @@ -61,7 +63,18 @@ public class DefaultToken extends AbstractToken implements CharSequence { - private final int length; // 24 bytes (20-super + 4) + /** + * Used in Token.text() to decide whether "this" should be returned and + * a text.charAt() will be slower or, for larger tokens, + * a subsequence of input source text should be created and returned instead.. + */ + private static final int INPUT_SOURCE_SUBSEQUENCE_THRESHOLD = 30; + + /** + * Field that is of type CharSequence and is either length of the token + * or cached text of the token as String. + */ + private CharSequence tokenLengthOrCachedText; // 24 bytes (20-super + 4) /** * Construct new default token. @@ -69,7 +82,7 @@ public DefaultToken(T id, int length) { super(id); assert (length > 0) : "Token length=" + length + " <= 0"; // NOI18N - this.length = length; + this.tokenLengthOrCachedText = TokenLength.get(length); } /** @@ -77,12 +90,12 @@ */ public DefaultToken(T id) { super(id); - this.length = 0; + this.tokenLengthOrCachedText = TokenLength.get(0); } @Override - public final int length() { - return length; + public int length() { + return tokenLengthOrCachedText.length(); } @Override @@ -90,4 +103,192 @@ return "DefT"; // NOI18N "TextToken" or "FlyToken" } + /** + * Get text represented by this token. + */ + @Override + public CharSequence text() { + CharSequence text; + if (tokenLengthOrCachedText.getClass() == TokenLength.class) { + if (!isRemoved()) { // Updates status for EmbeddedTokenList; tokenList != null + int len = tokenLengthOrCachedText.length(); + if (len >= INPUT_SOURCE_SUBSEQUENCE_THRESHOLD) { + // Create subsequence of input source text + CharSequence inputSourceText = tokenList.inputSourceText(); + int tokenOffset = tokenList.tokenOffset(this); + text = new InputSourceSubsequence(this, inputSourceText, + tokenOffset, tokenOffset + len, tokenOffset, tokenOffset + len); + } else { // Small token + text = this; + } + } else { // Token is removed + text = null; + } + } else { // tokenLength contains cached text + text = tokenLengthOrCachedText; + } + return text; + } + + /** + * Implementation of CharSequence.charAt() + * for case when this token is used as token's text char sequence. + */ + public final char charAt(int index) { + if (index < 0 || index >= length()) { + throw new IndexOutOfBoundsException( + "index=" + index + ", length=" + length() // NOI18N + ); + } + if (tokenList == null) { // Should normally not happen + // A bit strange to throw IOOBE but it's more practical since + // TokenHierarchy's dump can overcome IOOBE and deliver a useful debug but not NPEs etc. + throw new IndexOutOfBoundsException("index=" + index + ", length=" + length() + + " but tokenList==null for token " + dumpInfo(null, null, false, 0)); + } + int tokenOffset = tokenList.tokenOffset(this); + return tokenList.inputSourceText().charAt(tokenOffset + index); + } + + /** + * Implementation of CharSequence.subSequence() + * for case when this token is used as token's text char sequence. + */ + public final CharSequence subSequence(int start, int end) { + // Create subsequence of token's text + CharSequence text; + int textLength = tokenLengthOrCachedText.length(); + CharSequenceUtilities.checkIndexesValid(start, end, textLength); + + if (tokenLengthOrCachedText.getClass() == TokenLength.class) { + // If calling this.subSequence() then this.text() was already called + // so the status should be updated already and also the token is not removed. + // For simplicity always make a subsequence of the input source text. + CharSequence inputSourceText = tokenList.inputSourceText(); + int tokenOffset = tokenList.tokenOffset(this); + text = new InputSourceSubsequence(this, inputSourceText, + tokenOffset + start, tokenOffset + end, tokenOffset, tokenOffset + textLength); + + } else { // tokenLength contains cached text + text = tokenLengthOrCachedText.subSequence(start, end); + } + return text; + } + + /** + * Implementation of CharSequence.toString() + * for case when this token is used as token's text char sequence. + */ + @Override + public String toString() { + // In reality this method can either be called as result of calling Token.text().toString() + // or just calling Token.toString() for debugging purposes + String textStr; + if (tokenLengthOrCachedText.getClass() == TokenLength.class) { + if (!isRemoved()) { // Updates status for EmbeddedTokenList; tokenList != null + TokenLength tokenLength = (TokenLength) tokenLengthOrCachedText; + CharSequence inputSourceText = tokenList.inputSourceText(); + int nextCacheFactor = tokenLength.nextCacheFactor(); + int threshold = (inputSourceText.getClass() == String.class) + ? LexerUtilsConstants.INPUT_TEXT_STRING_THRESHOLD + : LexerUtilsConstants.CACHE_TOKEN_TO_STRING_THRESHOLD; + int tokenOffset = tokenList.tokenOffset(this); + textStr = inputSourceText.subSequence(tokenOffset, + tokenOffset + tokenLength.length()).toString(); + if (nextCacheFactor < threshold) { + tokenLengthOrCachedText = tokenLength.next(nextCacheFactor); + } else { // Should become cached + tokenLengthOrCachedText = textStr; + } + setTokenLengthOrCachedText(tokenLengthOrCachedText); + } else { // Token already removed + textStr = ""; + } + + } else { // tokenLength contains cached text + textStr = tokenLengthOrCachedText.toString(); + } + return textStr; + } + + synchronized CharSequence tokenLengthOrCachedText() { + return tokenLengthOrCachedText; + } + + synchronized void setTokenLengthOrCachedText(CharSequence tokenLengthOrCachedText) { + this.tokenLengthOrCachedText = tokenLengthOrCachedText; + } + + + private static final class InputSourceSubsequence implements CharSequence { + + private final DefaultToken token; // (8-super + 4) = 12 bytes + + private final CharSequence inputSourceText; // 16 bytes + + private final int start; // 20 bytes + + private final int end; // 24 bytes + + private final int tokenStart; // 28 bytes + + private final int tokenEnd; // 32 bytes + + public InputSourceSubsequence(DefaultToken token, CharSequence text, + int start, int end, int tokenStart, int tokenEnd + ) { + this.token = token; + this.inputSourceText = text; + this.start = start; + this.end = end; + this.tokenStart = tokenStart; + this.tokenEnd = tokenEnd; + } + + public int length() { + return end - start; + } + + public char charAt(int index) { + CharSequenceUtilities.checkIndexValid(index, length()); + return inputSourceText.charAt(start + index); + } + + public CharSequence subSequence(int start, int end) { + CharSequenceUtilities.checkIndexesValid(this, start, end); + return new InputSourceSubsequence(token, inputSourceText, + this.start + start, this.start + end, tokenStart, tokenEnd); + } + + @Override + public String toString() { + String textStr; + // Increase usage + CharSequence tokenLengthOrCachedText = token.tokenLengthOrCachedText(); + if (tokenLengthOrCachedText.getClass() == TokenLength.class) { + TokenLength tokenLength = (TokenLength) tokenLengthOrCachedText; + int nextCacheFactor = tokenLength.nextCacheFactor(); + int threshold = (inputSourceText.getClass() == String.class) + ? LexerUtilsConstants.INPUT_TEXT_STRING_THRESHOLD + : LexerUtilsConstants.CACHE_TOKEN_TO_STRING_THRESHOLD; + if (nextCacheFactor < threshold) { + textStr = inputSourceText.subSequence(start, end).toString(); + tokenLengthOrCachedText = tokenLength.next(nextCacheFactor); + } else { // Should become cached + // Create cached text + String tokenTextString = inputSourceText.subSequence(tokenStart, tokenEnd).toString(); + tokenLengthOrCachedText = tokenTextString; + // Substring returns this for start == 0 && end == length() + textStr = tokenTextString.substring(start - tokenStart, end - tokenStart); + } + token.setTokenLengthOrCachedText(tokenLengthOrCachedText); + + } else { // Already cached text + textStr = tokenLengthOrCachedText.subSequence(start - tokenStart, end - tokenStart).toString(); + } + return textStr; + } + + } + } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/JoinToken.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/token/JoinToken.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,136 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.token; + +import java.util.List; +import org.netbeans.api.lexer.PartType; +import org.netbeans.api.lexer.TokenHierarchy; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.editor.util.ArrayUtilities; +import org.netbeans.spi.lexer.TokenPropertyProvider; + +/** + * Token consisting of multiple parts. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +public final class JoinToken extends PropertyToken { + + private List> joinedParts; // 32 bytes (28-super + 4) + + private int completeLength; // 36 bytes + + /** + * Number of ETLs spanned including empty ETLs except a first part. + */ + private int extraTokenListSpanCount; // 40 bytes + + public JoinToken(T id, int length, TokenPropertyProvider propertyProvider, PartType partType) { + super(id, length, propertyProvider, partType); + } + + @Override + public List> joinedParts() { + return joinedParts; + } + + public void setJoinedParts(List> joinedParts, int extraTokenListSpanCount) { + assert (joinedParts != null) : "joinedParts expected to be non-null"; + this.joinedParts = joinedParts; + for (PartToken partToken : joinedParts) { + completeLength += partToken.length(); + } + this.extraTokenListSpanCount = extraTokenListSpanCount; + } + + public PartToken lastPart() { + return joinedParts.get(joinedParts.size() - 1); + } + + public int extraTokenListSpanCount() { + return extraTokenListSpanCount; + } + + @Override + public int offset(TokenHierarchy tokenHierarchy) { + return joinedParts.get(0).offset(tokenHierarchy); + } + + @Override + public int length() { + return completeLength; + } + + @Override + public CharSequence text() { + return new JoinTokenText(joinedParts, completeLength); + } + + @Override + public boolean isRemoved() { + // Check whether last part of token is removed - this needs to be improved + // for the case when token is just partially recreated. + return lastPart().isRemoved(); + } + + @Override + public StringBuilder dumpInfo(StringBuilder sb, TokenHierarchy tokenHierarchy, boolean dumpTokenText, int indent) { + super.dumpInfo(sb, tokenHierarchy, dumpTokenText, indent); + sb.append(", ").append(joinedParts.size()).append(" parts"); + int digitCount = String.valueOf(joinedParts.size() - 1).length(); + for (int i = 0; i < joinedParts.size(); i++) { + sb.append('\n'); + ArrayUtilities.appendSpaces(sb, indent + 2); + ArrayUtilities.appendBracketedIndex(sb, i, digitCount); + joinedParts.get(i).dumpInfo(sb, tokenHierarchy, dumpTokenText, indent + 4); + } + return sb; + } + + @Override + protected String dumpInfoTokenType() { + return "JoiT"; // NOI18N + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/JoinTokenText.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/token/JoinTokenText.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,130 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.token; + +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.editor.util.CharSequenceUtilities; + +/** + * Char sequence over join token parts. + * + * @author Miloslav Metelka + */ + +public final class JoinTokenText implements CharSequence { + + private static final Logger LOG = Logger.getLogger(JoinTokenText.class.getName()); + + private List> joinedParts; + + private int activePartIndex; + + private CharSequence activeInputText; + + private int activeStartCharIndex; + + private int activeEndCharIndex; + + private int length; + + public JoinTokenText(List> joinedParts, int length) { + this.joinedParts = joinedParts; + this.activeInputText = joinedParts.get(0).text(); + // Implicit: this.activeStartCharIndex = 0; + this.activeEndCharIndex = activeInputText.length(); + this.length = length; + } + + public synchronized char charAt(int index) { + if (index < activeStartCharIndex) { // Find non-empty previous + if (index < 0) + throw new IndexOutOfBoundsException("index=" + index + " < 0"); + do { + activePartIndex--; + if (activePartIndex < 0) { // Should never happen + LOG.log(Level.WARNING, "Internal error: index=" + index + ", " + dumpState()); + } + activeInputText = joinedParts.get(activePartIndex).text(); + int len = activeInputText.length(); + activeEndCharIndex = activeStartCharIndex; + activeStartCharIndex -= len; + } while (index < activeStartCharIndex); + } else if (index >= activeEndCharIndex) { // Find non-empty next + if (index >= length) + throw new IndexOutOfBoundsException("index=" + index + " >= length()=" + length); + do { + activePartIndex++; + activeInputText = joinedParts.get(activePartIndex).text(); + int len = activeInputText.length(); + activeStartCharIndex = activeEndCharIndex; + activeEndCharIndex += len; + } while (index >= activeEndCharIndex); + } + + // Valid char within current segment + return activeInputText.charAt(index - activeStartCharIndex); + } + + public int length() { + return length; + } + + public CharSequence subSequence(int start, int end) { + return CharSequenceUtilities.toString(this, start, end); + } + + @Override + public synchronized String toString() { + return CharSequenceUtilities.toString(this); + } + + private String dumpState() { + return "activeTokenListIndex=" + activePartIndex + + ", activeStartCharIndex=" + activeStartCharIndex + + ", activeEndCharIndex=" + activeEndCharIndex + + ", length=" + length; + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/PartToken.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/token/PartToken.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,103 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.token; + +import org.netbeans.api.lexer.PartType; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.lib.lexer.TokenOrEmbedding; +import org.netbeans.spi.lexer.TokenPropertyProvider; + +/** + * Part of a {@link JoinToken}. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +public final class PartToken extends PropertyToken { + + private TokenOrEmbedding joinTokenOrEmbedding; // 32 bytes (28-super + 4) + + private int partTokenIndex; // Index of this part inside + + private int partTextOffset; // Offset of this part's text among all parts that comprise the complete token + + public PartToken(T id, int length, TokenPropertyProvider propertyProvider, PartType partType, + TokenOrEmbedding joinToken, int partTokenIndex, int partTextOffset + ) { + super(id, length, propertyProvider, partType); + setJoinTokenOrEmbedding(joinToken); + this.partTokenIndex = partTokenIndex; + this.partTextOffset = partTextOffset; + } + + @Override + public JoinToken joinToken() { + return (JoinToken)joinTokenOrEmbedding.token(); + } + + public boolean isLastPart() { + return (joinToken().lastPart() == this); + } + + public TokenOrEmbedding joinTokenOrEmbedding() { + return joinTokenOrEmbedding; + } + + public void setJoinTokenOrEmbedding(TokenOrEmbedding joinTokenOrEmbedding) { + this.joinTokenOrEmbedding = joinTokenOrEmbedding; + } + + public int partTokenIndex() { + return partTokenIndex; + } + + public int partTextOffset() { + return partTextOffset; + } + + @Override + protected String dumpInfoTokenType() { + return "ParT[" + partTokenIndex + "]"; // NOI18N + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/PartTypePropertyProvider.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/token/PartTypePropertyProvider.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,112 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.token; + +import java.util.EnumSet; +import org.netbeans.api.lexer.PartType; +import org.netbeans.api.lexer.Token; +import org.netbeans.api.lexer.TokenId; +import org.netbeans.spi.lexer.TokenPropertyProvider; + +/** + * Property provider that stores {@link org.netbeans.api.lexer.PartType} information. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +public final class PartTypePropertyProvider implements TokenPropertyProvider { + + private static final PartTypePropertyProvider[] partTypeOrdinal2Provider + = new PartTypePropertyProvider[PartType.class.getEnumConstants().length]; + + static { + for (PartType partType : EnumSet.allOf(PartType.class)) { + partTypeOrdinal2Provider[partType.ordinal()] = new PartTypePropertyProvider(partType); + } + } + + public static TokenPropertyProvider get(PartType partType) { + return (TokenPropertyProvider)partTypeOrdinal2Provider[partType.ordinal()]; + } + + public static TokenPropertyProvider createDelegating( + PartType partType, TokenPropertyProvider delegate + ) { + return new Delegating(partType, delegate); + } + + private PartType partType; + + public PartTypePropertyProvider(PartType partType) { + this.partType = partType; + } + + public Object getValue(Token token, Object key) { + if (key == PartType.class) { + return partType; + } + return null; + } + + private static final class Delegating implements TokenPropertyProvider { + + private final PartType partType; + + private final TokenPropertyProvider delegate; + + Delegating(PartType partType, TokenPropertyProvider delegate) { + assert (delegate != null) : "delegate expected to be non-null. Use PartTypePropertyProvider.get() instead."; // NOTICES + this.partType = partType; + this.delegate = delegate; + } + + public Object getValue(Token token, Object key) { + if (key == PartType.class) { + return partType; + } + return delegate.getValue(token, key); + } + + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/PropertyToken.java --- a/lexer/src/org/netbeans/lib/lexer/token/PropertyToken.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/token/PropertyToken.java Wed May 28 14:48:55 2008 +0200 @@ -42,36 +42,29 @@ package org.netbeans.lib.lexer.token; import org.netbeans.api.lexer.PartType; -import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.LexerUtilsConstants; import org.netbeans.spi.lexer.TokenPropertyProvider; /** - * Token that holds information about preprocessed characters. - * - *

    - * Instances of this token are more costly than other token types - * because in addition to regular information they store preprocessed - * text of the token. + * Token with associated properties. It may also act as a token part but without + * a reference to a complete token e.g. suitable for java's incomplete block comment. * * @author Miloslav Metelka * @version 1.00 */ -public final class PropertyToken extends DefaultToken { - - private final TokenPropertyProvider propertyProvider; // 28 bytes (24-super + 4) - - private final PartType partType; // 32 bytes - - public PropertyToken(T id, int length, - TokenPropertyProvider propertyProvider, PartType partType) { +public class PropertyToken extends DefaultToken { + + private final TokenPropertyProvider propertyProvider; // 28 bytes (24-super + 4) + + public PropertyToken(T id, int length, TokenPropertyProvider propertyProvider, PartType partType) { super(id, length); - this.propertyProvider = propertyProvider; - this.partType = partType; + assert (partType != null); + this.propertyProvider = (propertyProvider != null) + ? PartTypePropertyProvider.createDelegating(partType, propertyProvider) + : PartTypePropertyProvider.get(partType); } - + @Override public boolean hasProperties() { return (propertyProvider != null); @@ -81,15 +74,15 @@ public Object getProperty(Object key) { return (propertyProvider != null) ? propertyProvider.getValue(this, key) : null; } - + @Override public PartType partType() { - return partType; + return (PartType) getProperty(PartType.class); } @Override protected String dumpInfoTokenType() { - return "ProT"; // NOI18N "PrepToken" + return "ProT"; // NOI18N } - + } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/TextToken.java --- a/lexer/src/org/netbeans/lib/lexer/token/TextToken.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/lib/lexer/token/TextToken.java Wed May 28 14:48:55 2008 +0200 @@ -45,13 +45,11 @@ import org.netbeans.lib.lexer.TokenList; /** - * Token with an explicit text - either serving as a custom text token - * or a flyweight token. + * Token with an explicit text - either serving a flyweight token + * or a non-flyweight replacement for a flyweight token. *
    - * The represented text can differ from the original content + * The represented text should be the same like the original content * of the recognized text input portion. - *
    - * Token with the custom text cannot be branched by a language embedding. * *

    * The text token can act as a flyweight token by calling @@ -74,7 +72,7 @@ * is expected to correspond to the recognized input portion * (i.e. the text is not custom). *
    - * The token can be made flyweight by using setRawOffset(-1). + * The token can be made flyweight by using makeFlyweight(). * * @param id non-null identification of the token. * @param text non-null text of the token. @@ -92,7 +90,7 @@ } @Override - public final int length() { + public int length() { return text.length(); } @@ -102,7 +100,7 @@ } public final TextToken createCopy(TokenList tokenList, int rawOffset) { - return new TextToken(id(), tokenList, rawOffset, text()); + return new TextToken(id(), tokenList, rawOffset, text); } @Override @@ -110,6 +108,7 @@ return isFlyweight() ? "FlyT" : "TexT"; // NOI18N "TextToken" or "FlyToken" } + @Override public String toString() { return text.toString(); } diff -r 06a7890f802e lexer/src/org/netbeans/lib/lexer/token/TokenLength.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/src/org/netbeans/lib/lexer/token/TokenLength.java Wed May 28 14:48:55 2008 +0200 @@ -0,0 +1,155 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.token; + +import org.netbeans.lib.lexer.LexerUtilsConstants; + +/** + * Improves performance of doing Token.text().toString(). + * by using a cache factor which gets increased by every access to that method. + *
    + * Once a cache factor exceeds a threshold the result of Token.text().toString() + * will be cached. + *
    + * TBD values of constants used by this class should be reviewed to match + * a real complexity of the particular operations. + * + * @author Miloslav Metelka + * @version 1.00 + */ + +public final class TokenLength implements CharSequence { + + private static final TokenLength[][] CACHE = new TokenLength[ + LexerUtilsConstants.MAX_CACHED_TOKEN_LENGTH + 1][]; + + + public static TokenLength get(int length) { + TokenLength tokenLength; + if (length <= LexerUtilsConstants.MAX_CACHED_TOKEN_LENGTH) { + synchronized (CACHE) { + TokenLength[] tokenLengths = CACHE[length]; + if (tokenLengths == null) { + tokenLengths = new TokenLength[1]; + CACHE[length] = tokenLengths; + } + tokenLength = tokenLengths[0]; + if (tokenLength == null) { + tokenLength = new TokenLength(length, + LexerUtilsConstants.CACHE_TOKEN_TO_STRING_THRESHOLD, (short)1); + tokenLengths[0] = tokenLength; + } + } + } else { // length too high - not cached + tokenLength = new TokenLength(length, + LexerUtilsConstants.CACHE_TOKEN_TO_STRING_THRESHOLD, (short)1); + } + return tokenLength; + } + + /** + * Length of a token. + */ + private final int length; // 12 bytes (8-super + 4) + + /** + * Cache factor of this item. + */ + private final short cacheFactor; // 14 bytes + + /** + * Index of a next item in array of token lengths with the same length in CACHE. + */ + private final short nextArrayIndex; // 16 bytes + + TokenLength(int length, short cacheFactor, short nextArrayIndex) { + this.length = length; + this.cacheFactor = cacheFactor; + this.nextArrayIndex = nextArrayIndex; + } + + public int length() { + return length; + } + + public short cacheFactor() { + return cacheFactor; + } + + public int nextCacheFactor() { + return cacheFactor + length + LexerUtilsConstants.TOKEN_LENGTH_STRING_CREATION_FACTOR; + } + + public TokenLength next(int nextCacheFactor) { + TokenLength tokenLength; + if (length <= LexerUtilsConstants.MAX_CACHED_TOKEN_LENGTH) { + synchronized (CACHE) { + TokenLength[] tokenLengths = CACHE[length]; + if (tokenLengths == null || tokenLengths.length <= nextArrayIndex) { + TokenLength[] tmp = new TokenLength[nextArrayIndex + 1]; + if (tokenLengths != null) { + System.arraycopy(tokenLengths, 0, tmp, 0, tokenLengths.length); + } + tokenLengths = tmp; + CACHE[length] = tokenLengths; + } + tokenLength = tokenLengths[nextArrayIndex]; + if (tokenLength == null) { + tokenLength = new TokenLength(length, (short)nextCacheFactor, (short)(nextArrayIndex + 1)); + tokenLengths[nextArrayIndex] = tokenLength; + } + } + } else { // length too high - not cached + tokenLength = new TokenLength(length, (short)nextCacheFactor, (short)(nextArrayIndex + 1)); + } + return tokenLength; + } + + public char charAt(int index) { + throw new IllegalStateException("Should never be called."); + } + + public CharSequence subSequence(int start, int end) { + throw new IllegalStateException("Should never be called."); + } + +} diff -r 06a7890f802e lexer/src/org/netbeans/spi/lexer/LanguageEmbedding.java --- a/lexer/src/org/netbeans/spi/lexer/LanguageEmbedding.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/spi/lexer/LanguageEmbedding.java Wed May 28 14:48:55 2008 +0200 @@ -205,9 +205,10 @@ return joinSections; } + @Override public String toString() { return "language: " + language() + ", skip[" + startSkipLength() // NOI18N - + ", " + endSkipLength + "]"; // NOI18N + + ", " + endSkipLength + "];" + (joinSections ? "join" : "no-join"); // NOI18N } } diff -r 06a7890f802e lexer/src/org/netbeans/spi/lexer/LanguageHierarchy.java --- a/lexer/src/org/netbeans/spi/lexer/LanguageHierarchy.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/spi/lexer/LanguageHierarchy.java Wed May 28 14:48:55 2008 +0200 @@ -48,7 +48,6 @@ import org.netbeans.api.lexer.LanguagePath; import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.lexer.CharProvider; import org.netbeans.lib.lexer.LexerApiPackageAccessor; import org.netbeans.lib.lexer.LexerInputOperation; import org.netbeans.lib.lexer.LexerSpiPackageAccessor; @@ -279,6 +278,9 @@ } /** + * This feature is currently not supported - Token.text() + * will return null for non-flyweight tokens. + *
    * Determine whether the text of the token with the particular id should * be retained after the token has been removed from the token list * because of the underlying mutable input source modification. @@ -387,8 +389,8 @@ return languageHierarchy.isRetainTokenText(id); } - public LexerInput createLexerInput(CharProvider charProvider) { - return new LexerInput(charProvider); + public LexerInput createLexerInput(LexerInputOperation operation) { + return new LexerInput(operation); } public Language language(MutableTextInput mti) { diff -r 06a7890f802e lexer/src/org/netbeans/spi/lexer/LexerInput.java --- a/lexer/src/org/netbeans/spi/lexer/LexerInput.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/spi/lexer/LexerInput.java Wed May 28 14:48:55 2008 +0200 @@ -42,7 +42,7 @@ package org.netbeans.spi.lexer; import org.netbeans.lib.editor.util.AbstractCharSequence; -import org.netbeans.lib.lexer.CharProvider; +import org.netbeans.lib.lexer.LexerInputOperation; import org.netbeans.lib.lexer.LexerUtilsConstants; /** @@ -81,10 +81,9 @@ public static final int EOF = -1; /** - * Character provider to which this lexer input delegates - * its operation. + * LexerInputOperation on which this lexer input delegates. */ - private CharProvider charProvider; + private LexerInputOperation operation; /** * Character sequence that corresponds @@ -101,10 +100,10 @@ /** * Construct instance of the lexer input. * - * @param charProvider non-null character provider for this lexer input. + * @param operation non-null character provider for this lexer input. */ - LexerInput(CharProvider charProvider) { - this.charProvider = charProvider; + LexerInput(LexerInputOperation operation) { + this.operation = operation; } /** @@ -116,7 +115,7 @@ * - all of them will return EOF. */ public int read() { - int c = charProvider.read(); + int c = operation.read(); if (c == EOF) { eof = 1; } @@ -158,7 +157,7 @@ eof = 0; // backup EOF count--; } - charProvider.backup(count); + operation.backup(count); } /** @@ -178,7 +177,7 @@ * If {@link LexerInput#EOF} was read then it is not counted into read length. */ public int readLength() { - return charProvider.readIndex(); + return operation.readLength(); } /** @@ -332,7 +331,7 @@ if (index < 0 || index >= length) { throw new IndexOutOfBoundsException("index=" + index + ", length=" + length); // NOI18N } - return charProvider.readExisting(index); + return operation.readExistingAtIndex(index); } } diff -r 06a7890f802e lexer/src/org/netbeans/spi/lexer/TokenFactory.java --- a/lexer/src/org/netbeans/spi/lexer/TokenFactory.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/spi/lexer/TokenFactory.java Wed May 28 14:48:55 2008 +0200 @@ -41,20 +41,11 @@ package org.netbeans.spi.lexer; -import java.util.Set; import org.netbeans.api.lexer.PartType; import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenId; -import org.netbeans.lib.editor.util.CharSequenceUtilities; -import org.netbeans.lib.lexer.LanguageOperation; import org.netbeans.lib.lexer.LexerInputOperation; -import org.netbeans.lib.lexer.TokenIdImpl; -import org.netbeans.lib.lexer.token.CustomTextToken; -import org.netbeans.lib.lexer.token.DefaultToken; -import org.netbeans.lib.lexer.token.ComplexToken; -import org.netbeans.lib.lexer.token.ComplexToken; -import org.netbeans.lib.lexer.token.PropertyToken; -import org.netbeans.lib.lexer.token.TextToken; +import org.netbeans.lib.lexer.LexerUtilsConstants; /** * Lexer should delegate all the token instances creation to this class. @@ -67,20 +58,17 @@ public final class TokenFactory { - /** Flag for additional correctness checks (may degrade performance). */ - private static final boolean testing = Boolean.getBoolean("netbeans.debug.lexer.test"); - /** - * Token instance that should be returned by the lexer + * Token instance that the token creation methods in this class produce * if there is an active filtering of certain token ids * and the just recognized token-id should be skipped. + * Normally lexers do not need to check for this except some specific cases + * in which the {@link #isSkipToken(Token)} is a better typed alternative + * to this field. + * + * @deprecated Use {@link #isSkipToken(Token)} instead. */ - public static final Token SKIP_TOKEN - = new TextToken( - new TokenIdImpl("skip-token-id; special id of TokenFactory.SKIP_TOKEN; " + // NOI18N - " It should never be part of token sequence", 0, null), // NOI18N - "" // empty skip token text NOI18N - ); + public static final Token SKIP_TOKEN = LexerUtilsConstants.SKIP_TOKEN; private final LexerInputOperation operation; @@ -95,7 +83,7 @@ * @see #createToken(TokenId, int) */ public Token createToken(T id) { - return createToken(id, operation.readIndex()); + return createToken(id, operation.readLength()); } /** @@ -111,21 +99,14 @@ * because of token id filter. */ public Token createToken(T id, int length) { - if (isSkipToken(id)) { - operation.tokenRecognized(length, true); - return skipToken(); - } else { // Do not skip the token - if (operation.tokenRecognized(length, false)) { // Create preprocessed token -// return new PreprocessedTextToken(id, operation.tokenLength()); - return new DefaultToken(id, operation.tokenLength()); - } else { - return new DefaultToken(id, operation.tokenLength()); - } - } + return operation.createToken(id, length); } /** * Create regular token instance with an explicit length and part type. + *
    + * This is suitable e.g. for unfinished block comment when a COMMENT token + * and PartType.START arguments would be used. * * @param id non-null token id recognized by the lexer. * @param length >=0 length of the token to be created. The length must not @@ -138,21 +119,7 @@ * because of token id filter. */ public Token createToken(T id, int length, PartType partType) { - checkPartTypeNonNull(partType); - if (partType == PartType.COMPLETE) - return createToken(id, length); - - if (isSkipToken(id)) { - operation.tokenRecognized(length, true); - return skipToken(); - } else { // Do not skip the token - if (operation.tokenRecognized(length, false)) { // Create preprocessed token -// return new ComplexToken(id, operation.tokenLength(), null, partType, null); - return new PropertyToken(id, operation.tokenLength(), null, partType); - } else { - return new PropertyToken(id, operation.tokenLength(), null, partType); - } - } + return operation.createToken(id, length, partType); } /** @@ -174,48 +141,16 @@ * because of token id filter. */ public Token getFlyweightToken(T id, String text) { - assert (text.length() <= operation.readIndex()); - // Compare each recognized char with the corresponding char in text - if (testing) { - for (int i = 0; i < text.length(); i++) { - if (text.charAt(i) != operation.readExisting(i)) { - throw new IllegalArgumentException("Flyweight text in " + // NOI18N - "TokenFactory.getFlyweightToken(" + id + ", \"" + // NOI18N - CharSequenceUtilities.debugText(text) + "\") " + // NOI18N - "differs from recognized text: '" + // NOI18N - CharSequenceUtilities.debugChar(operation.readExisting(i)) + - "' != '" + CharSequenceUtilities.debugChar(text.charAt(i)) + // NOI18N - "' at index=" + i // NOI18N - ); - } - } - } - - // Check whether token with given id should be created - if (isSkipToken(id)) { - operation.tokenRecognized(text.length(), true); - return skipToken(); - } else { // Do not skip the token - if (operation.tokenRecognized(text.length(), false)) { // Create preprocessed token -// return new PreprocessedTextToken(id, operation.tokenLength()); - return new DefaultToken(id, operation.tokenLength()); - } else if (operation.isFlyTokenAllowed()) { - LanguageOperation langOp = operation.languageOperation(); - return langOp.getFlyweightToken(id, text); - } else { // return non-flyweight token - return new DefaultToken(id, operation.tokenLength()); - } - } + return operation.getFlyweightToken(id, text); } /** - * Create token with properties. + * Create complete token with properties. * * @param id non-null token id. * @param length >=0 length of the token to be created. The length must not * exceed the number of characters read from the lexer input. - * @param propertyProvider non-null token property provider. - * @param partType whether this token is complete or just a part of complete token. + * @param propertyProvider token property provider or null if there are no extra properties. * See {@link TokenPropertyProvider} for examples how this parameter may be used. * @return non-null property token instance. *
    @@ -223,57 +158,90 @@ * if tokens for the given token id should be skipped * because of token id filter. */ - public Token createPropertyToken(T id, int length, - TokenPropertyProvider propertyProvider, PartType partType) { - checkPartTypeNonNull(partType); - if (isSkipToken(id)) { - operation.tokenRecognized(length, true); - return skipToken(); - } else { // Do not skip the token - if (operation.tokenRecognized(length, false)) { // Create preprocessed token -// return new ComplexToken(id, operation.tokenLength(), -// propertyProvider, null, partType); - return new PropertyToken(id, operation.tokenLength(), - propertyProvider, partType); - } else { - return new PropertyToken(id, operation.tokenLength(), - propertyProvider, partType); - } - } + public Token createPropertyToken(T id, int length, TokenPropertyProvider propertyProvider) { + return operation.createPropertyToken(id, length, propertyProvider, PartType.COMPLETE); } /** - * Create token with a custom text that possibly differs from the text - * represented by the token in the input text. + * Create token with properties. + * + * @param id non-null token id. + * @param length >=0 length of the token to be created. The length must not + * exceed the number of characters read from the lexer input. + * @param propertyProvider token property provider or null if there are no extra properties. + * See {@link TokenPropertyProvider} for examples how this parameter may be used. + * @param partType whether this token is complete or just a part of complete token. + * Null may be passed which implies {@link PartType#COMPLETE}. + * @return non-null property token instance. + *
    + * {@link #SKIP_TOKEN} will be returned + * if tokens for the given token id should be skipped + * because of token id filter. + */ + public Token createPropertyToken(T id, int length, + TokenPropertyProvider propertyProvider, PartType partType) { + return operation.createPropertyToken(id, length, propertyProvider, partType); + } + + /** + * Create token with a custom text that possibly differs in length and content + * from the text represented by the token in the input text. + *
    + * Note: This method should not be used. It is planned to be removed completely. + * The custom text tokens no longer + * save space by not refrencing the original characters (when read e.g. from a Reader). + *
    + * Having token's text to always match the input's text is more systematic + * and simplifies the lexer module's design. + *
    + * Therefore the only benefit of custom text tokens would be if certain tools + * e.g. parsers would require a different text than the one present naturally + * in the token. In such case the token should have a property + * (the key can be e.g. a CharSequence.class) that will return a char sequence + * with the desired text. If the text is a sub sequence of original token's text + * the token property provider can even be made flyweight: + *

    +     * StripFirstAndLastCharTokenPropertyProvider implements TokenPropertyProvider {
    +     *     public TokenPropertyProvider INSTANCE = new StripFirstAndLastCharTokenPropertyProvider();
    +     *     public Object getValue(Token token, Object key) {
    +     *         if (key == CharSequence.class) {
    +     *             return token.text().subSequence(1, token.length() - 1);
    +     *         }
    +     *         return null;
    +     *     }
    +     * }
    +     * 
    + * + *

    + *

    + * + * @param id non-null token id of the token being created. + * @param text non-null custom text assigned to the token. + * @param length recognized characters corresponding to the token being created. + * @param partType should always be null otherwise this method would throw + * an exception. + * @deprecated This method is deprecated without replacement - see description + * how a similar effect can be obtained. */ public Token createCustomTextToken(T id, CharSequence text, int length, PartType partType) { - checkPartTypeNonNull(partType); - if (isSkipToken(id)) { - operation.tokenRecognized(length, true); - return skipToken(); - } else { // Do not skip the token - if (operation.tokenRecognized(length, false)) { // Create preprocessed token - return new CustomTextToken(id, operation.tokenLength(), text, partType); -// return new ComplexToken(id, operation.tokenLength(), null, text, partType); - } else { - return new CustomTextToken(id, operation.tokenLength(), text, partType); - } + if (partType != null) { + throw new IllegalArgumentException("This method is deprecated and it should" + + " only be used with partType==null (see its javadoc)."); } - } - - private boolean isSkipToken(T id) { - Set skipTokenIds = operation.skipTokenIds(); - return (skipTokenIds != null) && skipTokenIds.contains(id); + return operation.createCustomTextToken(id, length, text); } - @SuppressWarnings("unchecked") // NOI18N - private Token skipToken() { - return SKIP_TOKEN; + /** + * Check whether a token (produced by one of the token creation methods) + * is a special flyweight token used in cases + * when there is an active filtering of certain token ids (e.g. comments and whitespace) + * and the just recognized token-id should be skipped. + * + * @param token non-null token. + * @return true if the token is a skip-token. + */ + public boolean isSkipToken(Token token) { + return token == SKIP_TOKEN; } - - private void checkPartTypeNonNull(PartType partType) { - if (partType == null) - throw new IllegalArgumentException("partType must be non-null"); - } - + } diff -r 06a7890f802e lexer/src/org/netbeans/spi/lexer/TokenPropertyProvider.java --- a/lexer/src/org/netbeans/spi/lexer/TokenPropertyProvider.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/src/org/netbeans/spi/lexer/TokenPropertyProvider.java Wed May 28 14:48:55 2008 +0200 @@ -83,7 +83,7 @@ * this.value = value; * } * - * public Object getValue(Token token, Object key) { + * public Object getValue(Token<T> token, Object key) { * if ("key".equals(key)) { * return value; * } @@ -109,6 +109,6 @@ * @param key non-null key for which the value should be retrieved. * @return value of the property or null if there is no value for the given key. */ - Object getValue(Token token, Object key); + Object getValue(Token token, Object key); } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/api/lexer/CustomTokenClassTest.java --- a/lexer/test/unit/src/org/netbeans/api/lexer/CustomTokenClassTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/api/lexer/CustomTokenClassTest.java Wed May 28 14:48:55 2008 +0200 @@ -41,6 +41,7 @@ package org.netbeans.api.lexer; +import java.util.List; import org.netbeans.junit.NbTestCase; /** @@ -99,6 +100,18 @@ public PartType partType() { return null; } + + public boolean isRemoved() { + return false; + } + + public Token joinToken() { + return null; + } + + public List> joinedParts() { + return null; + } } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/api/lexer/TokenSequenceTest.java --- a/lexer/test/unit/src/org/netbeans/api/lexer/TokenSequenceTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/api/lexer/TokenSequenceTest.java Wed May 28 14:48:55 2008 +0200 @@ -53,9 +53,9 @@ import org.netbeans.lib.lexer.TokenList; import org.netbeans.lib.lexer.test.LexerTestUtilities; import org.netbeans.lib.lexer.test.ModificationTextDocument; -import org.netbeans.lib.lexer.test.simple.*; import org.netbeans.lib.lexer.token.DefaultToken; import org.netbeans.lib.lexer.token.TextToken; +import org.netbeans.lib.lexer.token.TokenLength; /** * Test methods of token sequence. @@ -403,7 +403,7 @@ } public void testTokenSize() { - String text = "abc+"; + String text = "abc+def"; TokenHierarchy hi = TokenHierarchy.create(text,TestTokenId.language()); TokenSequence ts = hi.tokenSequence(); @@ -411,18 +411,39 @@ LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "abc", 0); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.PLUS, "+", 3); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "def", 4); assertFalse(ts.moveNext()); TokenList tokenList = LexerTestUtilities.tokenList(ts); ts.moveIndex(0); // move before "abc" assertTrue(ts.moveNext()); // Test DefaultToken size - assertSame(DefaultToken.class, ts.token().getClass()); - assertSize("Token instance too big", Collections.singletonList(ts.token()), 24,new Object[] { tokenList,TestTokenId.IDENTIFIER}); + Token token = ts.token(); + // Exclude TokenLength since it should be cached - verify later + TokenLength cachedTokenLength = TokenLength.get(token.length()); + assertSame(DefaultToken.class, token.getClass()); + assertSize("Token instance too big", Collections.singletonList(token), 24, + new Object[] { tokenList, TestTokenId.IDENTIFIER, cachedTokenLength }); + + // Check that TokenLength is cached for small tokens + assertSame("TokenLength instances not cached for small tokens", + cachedTokenLength, TokenLength.get(token.length())); + // Test TextToken size assertTrue(ts.moveNext()); - assertSame(TextToken.class, ts.token().getClass()); - assertSize("Token instance too big", Collections.singletonList(ts.token()), 24,new Object[] { tokenList,TestTokenId.PLUS, "+"}); + token = ts.token(); + assertSame(TextToken.class, token.getClass()); + assertSize("Token instance too big", Collections.singletonList(token), 24, + new Object[] { tokenList, TestTokenId.PLUS, "+" }); + + // Test DefaultToken size + assertTrue(ts.moveNext()); + token = ts.token(); + assertSame(DefaultToken.class, token.getClass()); + // Verify that the TokenLength is cached for small tokens - use tokenLength3 directly + assertSize("Token instance too big", Collections.singletonList(token), 24, + new Object[] { tokenList, TestTokenId.IDENTIFIER, cachedTokenLength }); } public void testSubSequenceInUnfinishedTH() throws Exception { diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/JoinSectionsTest.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/JoinSectionsTest.java Wed May 28 13:50:31 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,226 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. - * - * The contents of this file are subject to the terms of either the GNU - * General Public License Version 2 only ("GPL") or the Common - * Development and Distribution License("CDDL") (collectively, the - * "License"). You may not use this file except in compliance with the - * License. You can obtain a copy of the License at - * http://www.netbeans.org/cddl-gplv2.html - * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the - * specific language governing permissions and limitations under the - * License. When distributing the software, include this License Header - * Notice in each file and include the License file at - * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the GPL Version 2 section of the License file that - * accompanied this code. If applicable, add the following below the - * License Header, with the fields enclosed by brackets [] replaced by - * your own identifying information: - * "Portions Copyrighted [year] [name of copyright owner]" - * - * Contributor(s): - * - * The Original Software is NetBeans. The Initial Developer of the Original - * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun - * Microsystems, Inc. All Rights Reserved. - * - * If you wish your version of this file to be governed by only the CDDL - * or only the GPL Version 2, indicate your decision by adding - * "[Contributor] elects to include this software in this distribution - * under the [CDDL or GPL Version 2] license." If you do not indicate a - * single choice of license, a recipient has the option to distribute - * your version of this file under either the CDDL, the GPL Version 2 or - * to extend the choice of license to its licensees as provided above. - * However, if you add GPL Version 2 code and therefore, elected the GPL - * Version 2 license, then the option applies only if the new code is - * made subject to such option by the copyright holder. - */ -package org.netbeans.lib.lexer; - -import java.util.List; -import org.netbeans.api.lexer.Language; -import org.netbeans.api.lexer.LanguagePath; -import org.netbeans.api.lexer.PartType; -import org.netbeans.api.lexer.Token; -import org.netbeans.api.lexer.TokenHierarchy; -import org.netbeans.api.lexer.TokenSequence; -import org.netbeans.junit.NbTestCase; -import org.netbeans.lib.lexer.lang.TestJoinSectionsTextTokenId; -import org.netbeans.lib.lexer.test.LexerTestUtilities; -import org.netbeans.lib.lexer.lang.TestJoinSectionsTopTokenId; -import org.netbeans.lib.lexer.test.ModificationTextDocument; - -/** - * Test embedded sections that should be lexed together. - * - * @author Miloslav Metelka - */ -public class JoinSectionsTest extends NbTestCase { - - public JoinSectionsTest(String testName) { - super(testName); - } - - protected void setUp() throws Exception { - } - - public void testJoinSections() throws Exception { - // Turn on detailed checking -// Logger.getLogger(TokenHierarchyOperation.class.getName()).setLevel(Level.FINEST); - - // 000000000011111111112222222222 - // 012345678901234567890123456789 - String text = "a{be}fi{jm}n"; - ModificationTextDocument doc = new ModificationTextDocument(); - doc.insertString(0, text, null); - doc.putProperty(Language.class,TestJoinSectionsTopTokenId.language()); - - TokenHierarchy hi = TokenHierarchy.get(doc); - TokenSequence ts = hi.tokenSequence(); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "a{b", -1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", -1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "e}f", -1); - - // Get embedded tokens within TEXT tokens. There should be "a" then BRACES start "{b" then BRACES end "e}|" then "f" - LanguagePath innerLP = LanguagePath.get(TestJoinSectionsTopTokenId.language()). - embedded(TestJoinSectionsTextTokenId.language()); - List> tsList = hi.tokenSequenceList(innerLP, 0, Integer.MAX_VALUE); - checkInitialTokens(tsList); - - - // Use iterator for fetching token sequences - int i = 0; - for (TokenSequence ts2 : tsList) { - assertSame(ts2, tsList.get(i++)); - } - - LexerTestUtilities.assertConsistency(hi); - - // Check tokenSequenceList() with explicit offsets - // Check correct TSs bounds - tsList = hi.tokenSequenceList(innerLP, 0, 7); - assertEquals(1, tsList.size()); - tsList = hi.tokenSequenceList(innerLP, 0, 8); - assertEquals(2, tsList.size()); - - - // Do modifications - // Remove second closing brace '}' - doc.remove(8, 1); - LexerTestUtilities.assertConsistency(hi); - // 000000000011111111112222222222 - // 012345678901234567890123456789 - // before: "a{be}fi{jm}n"; - // after: "a{befi{jm}n"; - tsList = hi.tokenSequenceList(innerLP, 0, Integer.MAX_VALUE); - assertEquals(4, tsList.size()); // 2 sections - - // 1.section "a{b" - ts = tsList.get(0); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "a", -1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "{b", -1); - Token token = ts.token(); - assertEquals(PartType.START, token.partType()); - assertFalse(ts.moveNext()); - - // 2.section "ef" - ts = tsList.get(1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "ef", -1); - token = ts.token(); - assertEquals(PartType.MIDDLE, token.partType()); - assertFalse(ts.moveNext()); - - // 3.section "i{j" - ts = tsList.get(2); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "i{j", -1); - token = ts.token(); - assertEquals(PartType.MIDDLE, token.partType()); - assertFalse(ts.moveNext()); - - // 4.section "m}n" - ts = tsList.get(3); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "m}", -1); - token = ts.token(); - assertEquals(PartType.END, token.partType()); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "n", -1); - assertFalse(ts.moveNext()); - - - // Re-add second closing brace '}' - doc.insertString(8, "}", null); - LexerTestUtilities.assertConsistency(hi); - // 000000000011111111112222222222 - // 012345678901234567890123456789 - // before: "a{befi{jm}n"; - // after: "a{be}fi{jm}n"; - tsList = hi.tokenSequenceList(innerLP, 0, Integer.MAX_VALUE); - checkInitialTokens(tsList); - - doc.remove(0, doc.getLength()); - LexerTestUtilities.assertConsistency(hi); - ts = hi.tokenSequence(); - assertFalse(ts.moveNext()); - doc.insertString(0, text, null); - - } - - private void checkInitialTokens(List> tsList) { - // 000000000011111111112222222222 - // 012345678901234567890123456789 - // text: "a{be}fi{jm}n"; - assertEquals(4, tsList.size()); // 4 sections - - // 1.section - TokenSequence ts = tsList.get(0); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "a", -1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "{b", -1); - Token token = ts.token(); - assertEquals(PartType.START, token.partType()); - assertFalse(ts.moveNext()); - - // 2.section - ts = tsList.get(1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "e}", -1); - token = ts.token(); - assertEquals(PartType.END, token.partType()); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "f", -1); - assertFalse(ts.moveNext()); - - // 3.section - ts = tsList.get(2); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "i", -1); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "{j", -1); - token = ts.token(); - assertEquals(PartType.START, token.partType()); - assertFalse(ts.moveNext()); - - // 4.section - ts = tsList.get(3); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "m}", -1); - token = ts.token(); - assertEquals(PartType.END, token.partType()); - assertTrue(ts.moveNext()); - LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "n", -1); - assertFalse(ts.moveNext()); - } - -} diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJavadocLexer.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJavadocLexer.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJavadocLexer.java Wed May 28 14:48:56 2008 +0200 @@ -41,7 +41,6 @@ package org.netbeans.lib.lexer.lang; -import org.netbeans.lib.lexer.lang.TestJavadocTokenId; import org.netbeans.api.lexer.Token; import org.netbeans.spi.lexer.Lexer; import org.netbeans.spi.lexer.LexerInput; diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTextLexer.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTextLexer.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTextLexer.java Wed May 28 14:48:56 2008 +0200 @@ -42,7 +42,6 @@ package org.netbeans.lib.lexer.lang; import org.netbeans.api.lexer.PartType; -import org.netbeans.lib.lexer.lang.TestJoinSectionsTextTokenId; import org.netbeans.api.lexer.Token; import org.netbeans.spi.lexer.Lexer; import org.netbeans.spi.lexer.LexerInput; diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTextTokenId.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTextTokenId.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTextTokenId.java Wed May 28 14:48:56 2008 +0200 @@ -60,8 +60,8 @@ */ public enum TestJoinSectionsTextTokenId implements TokenId { - TEXT(), // Text except of text within braces - BRACES(); // "{ ... }" i.e. text within braces + BRACES(), // "{...}" i.e. text within braces + TEXT(); // Text except of text within braces private TestJoinSectionsTextTokenId() { } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTopTokenId.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTopTokenId.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/lang/TestJoinSectionsTopTokenId.java Wed May 28 14:48:56 2008 +0200 @@ -60,8 +60,8 @@ */ public enum TestJoinSectionsTopTokenId implements TokenId { - TEXT(), - TAG(); + TAG(), // Text enclosed in <..> including '<' and '>' + TEXT(); // Any text not enclosed in <...> private TestJoinSectionsTopTokenId() { } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/LexerTestUtilities.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/LexerTestUtilities.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/LexerTestUtilities.java Wed May 28 14:48:56 2008 +0200 @@ -182,16 +182,6 @@ } /** - * @see #assertTokenSequencesEqual(String,TokenSequence,TokenHierarchy,TokenSequence,TokenHierarchy,boolean) - */ - public static void assertTokenSequencesEqual( - TokenSequence expected, TokenHierarchy expectedHi, - TokenSequence actual, TokenHierarchy actualHi, - boolean testLookaheadAndState) { - assertTokenSequencesEqual(null, expected, expectedHi, actual, actualHi, testLookaheadAndState); - } - - /** * Compare contents of the given token sequences by moving through all their * tokens. *
    @@ -210,23 +200,17 @@ public static void assertTokenSequencesEqual(String message, TokenSequence expected, TokenHierarchy expectedHi, TokenSequence actual, TokenHierarchy actualHi, - boolean testLookaheadAndState) { - boolean success = false; - try { - String prefix = messagePrefix(message); - TestCase.assertEquals(prefix + "Move previous: ", expected.movePrevious(), actual.movePrevious()); - while (expected.moveNext()) { - TestCase.assertTrue(prefix + "Move next: ", actual.moveNext()); - assertTokensEqual(message, expected, expectedHi, actual, actualHi, testLookaheadAndState); - } - TestCase.assertFalse(prefix + "Move next not disabled", actual.moveNext()); - success = true; - } finally { - if (!success) { - System.err.println("Expected token sequence dump:\n" + expected); - System.err.println("Test token sequence dump:\n" + actual); - } + boolean testLookaheadAndState, boolean dumpWholeHi) { + String prefix = messagePrefix(message); + TestCase.assertEquals(prefix + "Move previous: ", expected.movePrevious(), actual.movePrevious()); + int i = 0; + while (expected.moveNext()) { + String prefixI = prefix + "->[" + i + "]"; + TestCase.assertTrue(prefixI + ": Cannot moveNext() in test token sequence", actual.moveNext()); + assertTokensEqual(prefixI, expected, expectedHi, actual, actualHi, testLookaheadAndState); + i++; } + TestCase.assertFalse(prefix + "moveNext() possible at end of test token sequence", actual.moveNext()); } private static void assertTokensEqual(String message, @@ -339,7 +323,9 @@ } public static void incCheck(Document doc, boolean nested) { - TokenHierarchy thInc = TokenHierarchy.get(doc); + TokenHierarchy incHi = TokenHierarchy.get(doc); + assertConsistency(incHi); + Language language = (Language) doc.getProperty(Language.class); String docText = null; @@ -349,35 +335,69 @@ e.printStackTrace(); TestCase.fail("BadLocationException occurred"); } - TokenHierarchy thBatch = TokenHierarchy.create(docText, language); - boolean success = false; - TokenSequence batchTS = thBatch.tokenSequence(); + TokenHierarchy batchHi = TokenHierarchy.create(docText, language); + TokenSequence batchTS = batchHi.tokenSequence(); + TokenSequence incTS = incHi.tokenSequence(); try { // Compare lookaheads and states as well - assertTokenSequencesEqual(batchTS, thBatch, - thInc.tokenSequence(), thInc, true); - success = true; - } finally { - if (!success) { - // Go forward two tokens to have an extra tokens context - batchTS.moveNext(); - batchTS.moveNext(); - System.err.println("BATCH token sequence dump:\n" + thBatch.tokenSequence()); - TokenHierarchy lastHi = (TokenHierarchy)doc.getProperty(LAST_TOKEN_HIERARCHY); - if (lastHi != null) { - System.err.println("PREVIOUS batch token sequence dump:\n" + lastHi.tokenSequence()); - } + assertTokenSequencesEqual("TOP", batchTS, batchHi, incTS, incHi, true, false); + } catch (Throwable t) { + // Go forward two tokens to have an extra tokens context + batchTS.moveNext(); + batchTS.moveNext(); + StringBuilder sb = new StringBuilder(512); + sb.append("BATCH token sequence dump:\n").append(batchTS); + sb.append("\n\nTEST token sequence dump:\n").append(incTS); + TokenHierarchy lastHi = (TokenHierarchy)doc.getProperty(LAST_TOKEN_HIERARCHY); + if (lastHi != null) { +// System.err.println("PREVIOUS batch token sequence dump:\n" + lastHi.tokenSequence()); + } + throw new IllegalStateException(sb.toString(), t); + } + + if (nested) { + batchTS.moveStart(); + incTS.moveStart(); + try { + incCheckNested("TOP", doc, batchTS, batchHi, incTS, incHi); + } catch (Throwable t) { // Re-throw with hierarchy info + StringBuilder sb = new StringBuilder(512); + sb.append("BATCH token hierarchy:\n").append(batchHi); + sb.append("\n\n\nTEST token hierarchy:\n").append(incHi); + throw new IllegalStateException(sb.toString(), t); } } - + // Check the change since last modification TokenHierarchy lastHi = (TokenHierarchy)doc.getProperty(LAST_TOKEN_HIERARCHY); if (lastHi != null) { // TODO comparison } - doc.putProperty(LAST_TOKEN_HIERARCHY, thBatch); // new last batch token hierarchy + doc.putProperty(LAST_TOKEN_HIERARCHY, batchHi); // new last batch token hierarchy } - + + public static void incCheckNested(String message, Document doc, + TokenSequence batch, TokenHierarchy batchTH, + TokenSequence inc, TokenHierarchy incTH + ) { + int i = 0; + while (inc.moveNext()) { + TestCase.assertTrue("No more tokens in batch token sequence", batch.moveNext()); + TokenSequence batchE = batch.embedded(); + TokenSequence incE = inc.embedded(); + String messageE = message + "->[" + i + "]"; + if (incE != null) { + TestCase.assertNotNull("Inc embedded sequence is null", batchE); + assertTokenSequencesEqual(messageE, batchE, batchTH, incE, incTH, true, true); + + incCheckNested(messageE, doc, batchE, batchTH, incE, incTH); + } else { // Inc embedded is null + TestCase.assertNull("Batch embedded sequence non-null", batchE); + } + i++; + } + } + /** * Get lookahead for the token to which the token sequence is positioned. *
    @@ -387,7 +407,7 @@ return tokenList(ts).lookahead(ts.index()); } - /** + /** * Get state for the token to which the token sequence is positioned. *
    * The method uses reflection to get reference to tokenList field in token sequence. diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/TestRandomModify.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/TestRandomModify.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/TestRandomModify.java Wed May 28 14:48:56 2008 +0200 @@ -47,7 +47,6 @@ import javax.swing.text.Document; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.TokenHierarchy; -import org.netbeans.api.lexer.TokenId; import org.netbeans.lib.editor.util.CharSequenceUtilities; @@ -58,6 +57,8 @@ * @author mmetelka */ public class TestRandomModify { + + private final Class hostClass; private boolean debugOperation; @@ -77,18 +78,19 @@ private List snapshots = new ArrayList(); - public TestRandomModify() { - this(0); + public TestRandomModify(Class hostClass) { + this(0, hostClass); } - public TestRandomModify(long seed) { + public TestRandomModify(long seed, Class hostClass) { + this.hostClass = hostClass; this.doc = new javax.swing.text.PlainDocument(); this.random = new Random(); if (seed == 0) { // Use currentTimeMillis() (btw nanoTime() in 1.5 instead) seed = System.currentTimeMillis(); } - System.err.println("TestRandomModify with SEED=" + seed + "L"); + System.err.println(hostClass.getName() + " with SEED=" + seed + "L"); random.setSeed(seed); } @@ -192,9 +194,15 @@ public void insertText(int offset, String text) throws Exception { if (text.length() > 0) { if (isDebugOperation()) { + int beforeTextStartOffset = Math.max(offset - 5, 0); + String beforeText = document().getText(beforeTextStartOffset, offset - beforeTextStartOffset); + int afterTextEndOffset = Math.min(offset + 5, document().getLength()); + String afterText = doc.getText(offset, afterTextEndOffset - offset); System.err.println(opIdString() + " INSERT(" + offset + - ", " + text.length() +"): \"" - + CharSequenceUtilities.debugText(text) +"\"" + ", " + text.length() +"): \"" + + CharSequenceUtilities.debugText(text) +"\" text-around: \"" + + CharSequenceUtilities.debugText(beforeText) + '|' + + CharSequenceUtilities.debugText(afterText) + "\"" ); if (isDebugDocumentText()) { StringBuilder sb = new StringBuilder(); @@ -328,6 +336,8 @@ public void clearDocument() throws Exception { doc.remove(0, doc.getLength()); + // Verify that there are no tokens + LexerTestUtilities.incCheck(doc, false); } public final Language language() { @@ -361,8 +371,8 @@ System.err.println("Comparing snapshot " + i + " of " + snapshots.size()); } // Check snapshot without comparing lookaheads and states - LexerTestUtilities.assertTokenSequencesEqual(bm.tokenSequence(), bm, - s.tokenSequence(), s, false); + LexerTestUtilities.assertTokenSequencesEqual(null, bm.tokenSequence(), bm, + s.tokenSequence(), s, false, false); } } } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/dump/TokenDumpCheck.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/dump/TokenDumpCheck.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/dump/TokenDumpCheck.java Wed May 28 14:48:56 2008 +0200 @@ -58,7 +58,7 @@ import org.netbeans.api.lexer.TokenUtilities; import org.netbeans.junit.NbTestCase; import org.netbeans.lib.editor.util.CharSequenceUtilities; -import org.netbeans.lib.lexer.batch.BatchTokenList; +import org.netbeans.lib.lexer.BatchTokenList; import org.netbeans.lib.lexer.test.LexerTestUtilities; /** diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/dump/TokenDumpLexer.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/dump/TokenDumpLexer.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/dump/TokenDumpLexer.java Wed May 28 14:48:56 2008 +0200 @@ -43,6 +43,7 @@ import org.netbeans.api.lexer.PartType; import org.netbeans.api.lexer.Token; +import org.netbeans.api.lexer.TokenId; import org.netbeans.spi.lexer.Lexer; import org.netbeans.spi.lexer.LexerInput; import org.netbeans.spi.lexer.LexerRestartInfo; @@ -205,7 +206,7 @@ case EOF: input.backup(1); return tokenFactory.createPropertyToken(id, input.readLength(), - new UnicodeCharValueProvider(new Character(ch)), PartType.COMPLETE); + new UnicodeCharValueProvider(new Character(ch)), PartType.COMPLETE); } } input.backup(1); @@ -219,20 +220,20 @@ public void release() { } - private static final class UnicodeCharValueProvider implements TokenPropertyProvider { - + private static final class UnicodeCharValueProvider implements TokenPropertyProvider { + private Character ch; - + UnicodeCharValueProvider(Character ch) { this.ch = ch; } - - public Object getValue(Token token, Object key) { + + public Object getValue(Token token, Object key) { if (TokenDumpTokenId.UNICODE_CHAR_TOKEN_PROPERTY.equals(key)) return ch; return null; // no non-tokenStore value } - + } - + } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/inc/TokenListUpdaterTest.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/inc/TokenListUpdaterTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/inc/TokenListUpdaterTest.java Wed May 28 14:48:56 2008 +0200 @@ -41,13 +41,16 @@ package org.netbeans.lib.lexer.test.inc; +import java.io.PrintStream; import java.util.ConcurrentModificationException; +import java.util.logging.Level; +import java.util.logging.Logger; import javax.swing.text.Document; import junit.framework.TestCase; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.TokenHierarchy; -import org.netbeans.api.lexer.TokenId; import org.netbeans.api.lexer.TokenSequence; +import org.netbeans.junit.NbTestCase; import org.netbeans.lib.lexer.test.LexerTestUtilities; import org.netbeans.lib.lexer.test.ModificationTextDocument; import org.netbeans.lib.lexer.lang.TestTokenId; @@ -57,7 +60,7 @@ * * @author mmetelka */ -public class TokenListUpdaterTest extends TestCase { +public class TokenListUpdaterTest extends NbTestCase { public TokenListUpdaterTest(String testName) { super(testName); @@ -67,6 +70,18 @@ } protected void tearDown() throws java.lang.Exception { + } + + @Override + public PrintStream getLog() { + return System.out; +// return super.getLog(); + } + + @Override + protected Level logLevel() { + return Level.INFO; +// return super.logLevel();; } public void testInsertUnfinishedLexing() throws Exception { @@ -176,7 +191,9 @@ LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "a", 0); // Remove "b" +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.FINE); // Extra logging doc.remove(2, 1); +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.WARNING); // End of extra logging try { ts.moveNext(); fail("Should not get there"); @@ -186,6 +203,7 @@ ts = hi.tokenSequence(); assertTrue(ts.moveNext()); + CharSequence tokenText = ts.token().text(); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "a", 0); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.PLUS, "+", 1); diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/join/JoinSectionsMod1Test.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/join/JoinSectionsMod1Test.java Wed May 28 14:48:56 2008 +0200 @@ -0,0 +1,293 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ +package org.netbeans.lib.lexer.test.join; + +import java.io.PrintStream; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.netbeans.api.lexer.Language; +import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.PartType; +import org.netbeans.api.lexer.Token; +import org.netbeans.api.lexer.TokenHierarchy; +import org.netbeans.api.lexer.TokenSequence; +import org.netbeans.junit.NbTestCase; +import org.netbeans.lib.lexer.lang.TestJoinSectionsTextTokenId; +import org.netbeans.lib.lexer.test.LexerTestUtilities; +import org.netbeans.lib.lexer.lang.TestJoinSectionsTopTokenId; +import org.netbeans.lib.lexer.test.ModificationTextDocument; + +/** + * Test embedded sections that should be lexed together. + * + *

    + * Top lexer recognizes TestJoinSectionsTopTokenId.TAG (text v zobacich) + * and TestJoinSectionsTopTokenId.TEXT (everything else). + * + * TestJoinSectionsTopTokenId.TAG is branched into + * TestJoinSectionsTextTokenId.BRACES "{...}" + * and TestJoinSectionsTextTokenId.TEXT (everything else) + * + *

    + * + * + * @author Miloslav Metelka + */ +public class JoinSectionsMod1Test extends NbTestCase { + + public JoinSectionsMod1Test(String testName) { + super(testName); + } + + protected void setUp() throws Exception { + } + + @Override + public PrintStream getLog() { + return System.out; +// return super.getLog(); + } + + @Override + protected Level logLevel() { + return Level.INFO; +// return super.logLevel();; + } + + public void testShortDocMod() throws Exception { + // 000000000011111111112222222222 + // 012345678901234567890123456789 + String text = "xayzc"; + ModificationTextDocument doc = new ModificationTextDocument(); + doc.insertString(0, text, null); + doc.putProperty(Language.class, TestJoinSectionsTopTokenId.language()); + LexerTestUtilities.incCheck(doc, true); // Ensure the whole embedded hierarchy gets created + + Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.FINE); // Extra logging + doc.remove(6, 1); + LexerTestUtilities.incCheck(doc, true); + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // text = "xayc"; + // \yzhk + doc.insertString(6, "yzhk", null); + LexerTestUtilities.incCheck(doc, true); + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // text = "xayyzhkc"; + doc.remove(12, 3); + LexerTestUtilities.incCheck(doc, true); + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // text = "xayyz"; + doc.insertString(12, "hkc", null); + LexerTestUtilities.incCheck(doc, true); + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // text = "xayyzhkc"; + + } + + public void testJoinSections() throws Exception { + if (true) + return; + // Turn on detailed checking +// Logger.getLogger(TokenHierarchyOperation.class.getName()).setLevel(Level.FINEST); + + // 000000000011111111112222222222 + // 012345678901234567890123456789 + String text = "a{be}fi{jm}n"; + ModificationTextDocument doc = new ModificationTextDocument(); + doc.insertString(0, text, null); + doc.putProperty(Language.class,TestJoinSectionsTopTokenId.language()); + + TokenHierarchy hi = TokenHierarchy.get(doc); + TokenSequence ts = hi.tokenSequence(); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "a{b", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "e}f", -1); + + // Get embedded tokens within TEXT tokens. There should be "a" then BRACES start "{b" then BRACES end "e}|" then "f" + LanguagePath innerLP = LanguagePath.get(TestJoinSectionsTopTokenId.language()). + embedded(TestJoinSectionsTextTokenId.language()); + List> tsList = hi.tokenSequenceList(innerLP, 0, Integer.MAX_VALUE); + checkInitialTokens(tsList); + + + // Use iterator for fetching token sequences + int i = 0; + for (TokenSequence ts2 : tsList) { + assertSame(ts2, tsList.get(i++)); + } + + LexerTestUtilities.assertConsistency(hi); + + // Check tokenSequenceList() with explicit offsets + // Check correct TSs bounds + tsList = hi.tokenSequenceList(innerLP, 0, 7); + assertEquals(1, tsList.size()); + tsList = hi.tokenSequenceList(innerLP, 0, 8); + assertEquals(2, tsList.size()); + + + // Do modifications + // Remove second closing brace '}' + +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.FINE); // Extra logging + doc.remove(8, 1); +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.WARNING); // End of extra logging + LexerTestUtilities.assertConsistency(hi); + LexerTestUtilities.incCheck(doc, true); + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // before: "a{be}fi{jm}n"; + // after: "a{befi{jm}n"; + // i0 i1 i2 i3 + tsList = hi.tokenSequenceList(innerLP, 0, Integer.MAX_VALUE); + assertEquals(4, tsList.size()); // 2 sections + + // 1.section "a{b" + ts = tsList.get(0); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "a", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "{b", -1); + Token token = ts.token(); + assertEquals(PartType.START, token.partType()); + assertFalse(ts.moveNext()); + + // 2.section "ef" + ts = tsList.get(1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "ef", -1); + token = ts.token(); + assertEquals(PartType.MIDDLE, token.partType()); + assertFalse(ts.moveNext()); + + // 3.section "i{j" + ts = tsList.get(2); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "i{j", -1); + token = ts.token(); + assertEquals(PartType.MIDDLE, token.partType()); + assertFalse(ts.moveNext()); + + // 4.section "m}n" + ts = tsList.get(3); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "m}", -1); + token = ts.token(); + assertEquals(PartType.END, token.partType()); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "n", -1); + assertFalse(ts.moveNext()); + + + // Re-add second closing brace '}' + doc.insertString(8, "}", null); + LexerTestUtilities.assertConsistency(hi); + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // before: "a{befi{jm}n"; + // after: "a{be}fi{jm}n"; + tsList = hi.tokenSequenceList(innerLP, 0, Integer.MAX_VALUE); + checkInitialTokens(tsList); + + doc.remove(0, doc.getLength()); + LexerTestUtilities.assertConsistency(hi); + ts = hi.tokenSequence(); + assertFalse(ts.moveNext()); + doc.insertString(0, text, null); + + } + + private void checkInitialTokens(List> tsList) { + // 000000000011111111112222222222 + // 012345678901234567890123456789 + // text: "a{be}fi{jm}n"; + assertEquals(4, tsList.size()); // 4 sections + + // 1.section + TokenSequence ts = tsList.get(0); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "a", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "{b", -1); + Token token = ts.token(); + assertEquals(PartType.START, token.partType()); + assertFalse(ts.moveNext()); + + // 2.section + ts = tsList.get(1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "e}", -1); + token = ts.token(); + assertEquals(PartType.END, token.partType()); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "f", -1); + assertFalse(ts.moveNext()); + + // 3.section + ts = tsList.get(2); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "i", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "{j", -1); + token = ts.token(); + assertEquals(PartType.START, token.partType()); + assertFalse(ts.moveNext()); + + // 4.section + ts = tsList.get(3); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.BRACES, "m}", -1); + token = ts.token(); + assertEquals(PartType.END, token.partType()); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTextTokenId.TEXT, "n", -1); + assertFalse(ts.moveNext()); + } + +} diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/join/JoinSectionsMod2Test.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/join/JoinSectionsMod2Test.java Wed May 28 14:48:56 2008 +0200 @@ -0,0 +1,178 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.test.join; + +import java.util.List; +import org.netbeans.api.lexer.Language; +import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.PartType; +import org.netbeans.lib.lexer.lang.TestJoinSectionsTopTokenId; +import org.netbeans.lib.lexer.lang.TestTokenId; +import org.netbeans.api.lexer.TokenChange; +import org.netbeans.api.lexer.TokenHierarchy; +import org.netbeans.api.lexer.TokenHierarchyEvent; +import org.netbeans.api.lexer.TokenHierarchyListener; +import org.netbeans.api.lexer.TokenSequence; +import org.netbeans.junit.NbTestCase; +import org.netbeans.lib.lexer.lang.TestJoinSectionsTextTokenId; +import org.netbeans.lib.lexer.test.LexerTestUtilities; +import org.netbeans.lib.lexer.lang.TestPlainTokenId; +import org.netbeans.lib.lexer.test.ModificationTextDocument; +import org.netbeans.spi.lexer.LanguageEmbedding; + +/** + * Test several simple lexer impls. + * + *

    + * Top lexer recognizes TestJoinSectionsTopTokenId.TAG (text v zobacich) + * and TestJoinSectionsTopTokenId.TEXT (everything else). + * + * TestJoinSectionsTopTokenId.TAG is branched into + * TestJoinSectionsTextTokenId.BRACES "{...}" + * and TestJoinSectionsTextTokenId.TEXT (everything else) + * + *

    + * + * @author mmetelka + */ +public class JoinSectionsMod2Test extends NbTestCase { + + public JoinSectionsMod2Test(String testName) { + super(testName); + } + + protected void setUp() throws java.lang.Exception { + } + + protected void tearDown() throws java.lang.Exception { + } + + public void testMove() throws Exception { // TokenSequence.move() and moveIndex() + // 000000000011111111112222222222 + // 012345678901234567890123456789 + String text = "a{be}fi{}{jm}npr"; + ModificationTextDocument doc = new ModificationTextDocument(); + doc.insertString(0, text, null); + doc.putProperty(Language.class,TestJoinSectionsTopTokenId.language()); + + TokenHierarchy hi = TokenHierarchy.get(doc); + TokenSequence ts = hi.tokenSequence(); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "a{b", 0); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", 3); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "e}f", 7); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", 10); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "i{}{j", 14); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", 19); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "m}n", 23); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "p", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "r", -1); + assertFalse(ts.moveNext()); + + // Check regular TS.embedded() + ts.moveStart(); + assertTrue(ts.moveNext()); // Over "a{b" + TokenSequence tse = ts.embedded(); + assertTrue(tse.moveNext()); + LexerTestUtilities.assertTokenEquals(tse,TestJoinSectionsTextTokenId.TEXT, "a", 0); + assertTrue(tse.moveNext()); + LexerTestUtilities.assertTokenEquals(tse,TestJoinSectionsTextTokenId.BRACES, "{b", 1); + assertEquals(tse.token().partType(), PartType.START); + assertFalse(tse.moveNext()); + + assertTrue(ts.moveNext()); // Over "" + assertTrue(ts.moveNext()); // Over "e}f" + TokenSequence tse2 = ts.embedded(); + assertTrue(tse2.moveNext()); + LexerTestUtilities.assertTokenEquals(tse2,TestJoinSectionsTextTokenId.BRACES, "e}", 7); + assertEquals(tse2.token().partType(), PartType.END); + assertTrue(tse2.moveNext()); + LexerTestUtilities.assertTokenEquals(tse2,TestJoinSectionsTextTokenId.TEXT, "f", 9); + assertEquals(tse2.token().partType(), PartType.START); + assertFalse(tse2.moveNext()); + + assertTrue(ts.moveNext()); // Over "" + assertTrue(ts.moveNext()); // Over "i{}{j" + TokenSequence tse3 = ts.embedded(); + assertTrue(tse3.moveNext()); + LexerTestUtilities.assertTokenEquals(tse3,TestJoinSectionsTextTokenId.TEXT, "i", 14); + assertEquals(tse3.token().partType(), PartType.END); + assertTrue(tse3.moveNext()); + LexerTestUtilities.assertTokenEquals(tse3,TestJoinSectionsTextTokenId.BRACES, "{}", 15); + assertEquals(tse3.token().partType(), PartType.COMPLETE); + assertTrue(tse3.moveNext()); + LexerTestUtilities.assertTokenEquals(tse3,TestJoinSectionsTextTokenId.BRACES, "{j", 17); + assertEquals(tse3.token().partType(), PartType.START); + assertFalse(tse3.moveNext()); + + + // Check TS.embeddedJoin() + TokenSequence tsej = ts.embeddedJoined(); + assertEquals(2, tsej.index()); + assertTrue(tsej.moveNext()); + int o = tsej.offset(); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.TEXT, "fi", 9); + assertEquals(9, tsej.token().offset(null)); // Assert also token.offset() besides TS.offset() + assertEquals(tsej.token().partType(), PartType.COMPLETE); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.BRACES, "{}", 15); + assertEquals(tsej.token().partType(), PartType.COMPLETE); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.BRACES, "{jm}", 17); + assertEquals(tsej.token().partType(), PartType.COMPLETE); +// assertFalse(tsej.moveNext()); + + } + +} diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/join/JoinSectionsPositioningTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/join/JoinSectionsPositioningTest.java Wed May 28 14:48:56 2008 +0200 @@ -0,0 +1,250 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of either the GNU + * General Public License Version 2 only ("GPL") or the Common + * Development and Distribution License("CDDL") (collectively, the + * "License"). You may not use this file except in compliance with the + * License. You can obtain a copy of the License at + * http://www.netbeans.org/cddl-gplv2.html + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the + * specific language governing permissions and limitations under the + * License. When distributing the software, include this License Header + * Notice in each file and include the License file at + * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the GPL Version 2 section of the License file that + * accompanied this code. If applicable, add the following below the + * License Header, with the fields enclosed by brackets [] replaced by + * your own identifying information: + * "Portions Copyrighted [year] [name of copyright owner]" + * + * Contributor(s): + * + * The Original Software is NetBeans. The Initial Developer of the Original + * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun + * Microsystems, Inc. All Rights Reserved. + * + * If you wish your version of this file to be governed by only the CDDL + * or only the GPL Version 2, indicate your decision by adding + * "[Contributor] elects to include this software in this distribution + * under the [CDDL or GPL Version 2] license." If you do not indicate a + * single choice of license, a recipient has the option to distribute + * your version of this file under either the CDDL, the GPL Version 2 or + * to extend the choice of license to its licensees as provided above. + * However, if you add GPL Version 2 code and therefore, elected the GPL + * Version 2 license, then the option applies only if the new code is + * made subject to such option by the copyright holder. + */ + +package org.netbeans.lib.lexer.test.join; + +import java.util.List; +import org.netbeans.api.lexer.Language; +import org.netbeans.api.lexer.LanguagePath; +import org.netbeans.api.lexer.PartType; +import org.netbeans.lib.lexer.lang.TestJoinSectionsTopTokenId; +import org.netbeans.lib.lexer.lang.TestTokenId; +import org.netbeans.api.lexer.TokenChange; +import org.netbeans.api.lexer.TokenHierarchy; +import org.netbeans.api.lexer.TokenHierarchyEvent; +import org.netbeans.api.lexer.TokenHierarchyListener; +import org.netbeans.api.lexer.TokenSequence; +import org.netbeans.junit.NbTestCase; +import org.netbeans.lib.lexer.lang.TestJoinSectionsTextTokenId; +import org.netbeans.lib.lexer.test.LexerTestUtilities; +import org.netbeans.lib.lexer.lang.TestPlainTokenId; +import org.netbeans.lib.lexer.test.ModificationTextDocument; +import org.netbeans.spi.lexer.LanguageEmbedding; + +/** + * Test several simple lexer impls. + * + *

    + * Top lexer recognizes TestJoinSectionsTopTokenId.TAG (text v zobacich) + * and TestJoinSectionsTopTokenId.TEXT (everything else). + * + * TestJoinSectionsTopTokenId.TAG is branched into + * TestJoinSectionsTextTokenId.BRACES "{...}" + * and TestJoinSectionsTextTokenId.TEXT (everything else) + * + *

    + * + * @author mmetelka + */ +public class JoinSectionsPositioningTest extends NbTestCase { + + public JoinSectionsPositioningTest(String testName) { + super(testName); + } + + protected void setUp() throws java.lang.Exception { + } + + protected void tearDown() throws java.lang.Exception { + } + + private ModificationTextDocument initDocument() throws Exception { + // 000000000011111111112222222222 + // 012345678901234567890123456789 + String text = "a{be}fi{}{jm}npr"; + ModificationTextDocument doc = new ModificationTextDocument(); + doc.insertString(0, text, null); + doc.putProperty(Language.class,TestJoinSectionsTopTokenId.language()); + return doc; + } + + public void testTokensAndEmbeddings() throws Exception { // TokenSequence.move() and moveIndex() + ModificationTextDocument doc = initDocument(); + TokenHierarchy hi = TokenHierarchy.get(doc); + TokenSequence ts = hi.tokenSequence(); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "a{b", 0); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", 3); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "e}f", 7); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", 10); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "i{}{j", 14); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", 19); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "m}n", 23); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "p", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TAG, "", -1); + assertTrue(ts.moveNext()); + LexerTestUtilities.assertTokenEquals(ts,TestJoinSectionsTopTokenId.TEXT, "r", -1); + assertFalse(ts.moveNext()); + assertEquals(11, ts.tokenCount()); + + // Check regular TS.embedded() + ts.moveStart(); + assertTrue(ts.moveNext()); // Over "a{b" + TokenSequence tse = ts.embedded(); + assertTrue(tse.moveNext()); + LexerTestUtilities.assertTokenEquals(tse,TestJoinSectionsTextTokenId.TEXT, "a", 0); + assertTrue(tse.moveNext()); + LexerTestUtilities.assertTokenEquals(tse,TestJoinSectionsTextTokenId.BRACES, "{b", 1); + assertEquals(tse.token().partType(), PartType.START); + assertFalse(tse.moveNext()); + + assertTrue(ts.moveNext()); // Over "" + assertTrue(ts.moveNext()); // Over "e}f" + TokenSequence tse2 = ts.embedded(); + assertTrue(tse2.moveNext()); + LexerTestUtilities.assertTokenEquals(tse2,TestJoinSectionsTextTokenId.BRACES, "e}", 7); + assertEquals(tse2.token().partType(), PartType.END); + assertTrue(tse2.moveNext()); + LexerTestUtilities.assertTokenEquals(tse2,TestJoinSectionsTextTokenId.TEXT, "f", 9); + assertEquals(tse2.token().partType(), PartType.START); + assertFalse(tse2.moveNext()); + + assertTrue(ts.moveNext()); // Over "" + assertTrue(ts.moveNext()); // Over "i{}{j" + TokenSequence tse3 = ts.embedded(); + assertTrue(tse3.moveNext()); + LexerTestUtilities.assertTokenEquals(tse3,TestJoinSectionsTextTokenId.TEXT, "i", 14); + assertEquals(tse3.token().partType(), PartType.END); + assertTrue(tse3.moveNext()); + LexerTestUtilities.assertTokenEquals(tse3,TestJoinSectionsTextTokenId.BRACES, "{}", 15); + assertEquals(tse3.token().partType(), PartType.COMPLETE); + assertTrue(tse3.moveNext()); + LexerTestUtilities.assertTokenEquals(tse3,TestJoinSectionsTextTokenId.BRACES, "{j", 17); + assertEquals(tse3.token().partType(), PartType.START); + assertFalse(tse3.moveNext()); + + + // Check TS.embeddedJoin() + TokenSequence tsej = ts.embeddedJoined(); + assertEquals(2, tsej.index()); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.TEXT, "fi", 9); + assertEquals(9, tsej.token().offset(null)); // Assert also token.offset() besides TS.offset() + assertEquals(tsej.token().partType(), PartType.COMPLETE); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.BRACES, "{}", 15); + assertEquals(tsej.token().partType(), PartType.COMPLETE); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.BRACES, "{jm}", 17); + assertEquals(tsej.token().partType(), PartType.COMPLETE); + + tsej.moveStart(); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.TEXT, "a", 0); + assertEquals(0, tsej.token().offset(null)); // Assert also token.offset() besides TS.offset() + assertEquals(tsej.token().partType(), PartType.COMPLETE); + assertTrue(tsej.moveNext()); + LexerTestUtilities.assertTokenEquals(tsej,TestJoinSectionsTextTokenId.BRACES, "{be}", 1); + assertEquals(1, tsej.token().offset(null)); // Assert also token.offset() besides TS.offset() + assertEquals(tsej.token().partType(), PartType.COMPLETE); +// assertFalse(tsej.moveNext()); + + } + + public void testTSMove() throws Exception { + ModificationTextDocument doc = initDocument(); + TokenHierarchy hi = TokenHierarchy.get(doc); + TokenSequence ts = hi.tokenSequence(); + assertEquals(1, ts.move(8)); + ts.moveStart(); + assertTrue(ts.moveNext()); + + // Test TS.move() on embeddedJoin() + TokenSequence tsej = ts.embeddedJoined(); + assertEquals(-3, tsej.move(-3)); // Token starts at offset == 1 + assertEquals(0, tsej.index()); + + assertEquals(7, tsej.move(8)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(6, tsej.move(7)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(5, tsej.move(6)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(2, tsej.move(3)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(1, tsej.move(2)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(1, tsej.move(2)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(1, tsej.move(2)); // Token starts at offset == 1 + assertEquals(1, tsej.index()); + + assertEquals(1, tsej.move(16)); // Token starts at offset == 15 + assertEquals(3, tsej.index()); + + assertEquals(0, tsej.move(15)); // Token starts at offset == 15 + assertEquals(3, tsej.index()); + + assertEquals(0, tsej.move(17)); // Token starts at offset == 15 + assertEquals(4, tsej.index()); + + } + + public void testShortDoc() throws Exception { + // 000000000011111111112222222222 + // 012345678901234567890123456789 + String text = "ac"; + ModificationTextDocument doc = new ModificationTextDocument(); + doc.insertString(0, text, null); + doc.putProperty(Language.class, TestJoinSectionsTopTokenId.language()); + TokenHierarchy hi = TokenHierarchy.get(doc); + TokenSequence ts = hi.tokenSequence(); + assertTrue(ts.moveNext()); + ts.embedded(); // Creates JTL + } + +} diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/CustomEmbeddingTest.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/CustomEmbeddingTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/CustomEmbeddingTest.java Wed May 28 14:48:56 2008 +0200 @@ -106,7 +106,7 @@ assertEquals(0, etc.embeddedChangeCount()); // Test the contents of the embedded sequence - TokenSequence ets = ts.embedded(); + TokenSequence ets = ts.embedded(); // Over "// line comment" assertTrue(ets.moveNext()); LexerTestUtilities.assertTokenEquals(ets,TestTokenId.IDENTIFIER, "line", 18); assertTrue(ets.moveNext()); @@ -158,7 +158,7 @@ // Check token sequence list - // Create custm embedding again + // Create custom embedding again assertTrue(ts.createEmbedding(TestTokenId.language(), 2, 2)); LanguagePath lpe = LanguagePath.get(TestTokenId.language()).embedded(TestTokenId.language()); List> tsl = hi.tokenSequenceList(lpe, 0, Integer.MAX_VALUE); diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerBatchTest.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerBatchTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerBatchTest.java Wed May 28 14:48:56 2008 +0200 @@ -174,7 +174,7 @@ } tm = System.currentTimeMillis() - tm; assertTrue("Timeout tm = " + tm + "msec", tm < 1000); // Should be fast - System.out.println("Lexed input " + text.length() + System.out.println("SimpleLexerBatchTest.testPerf(): Lexed input " + text.length() + " chars long and created " + cntr + " tokens in " + tm + " ms."); } diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerIncTest.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerIncTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerIncTest.java Wed May 28 14:48:56 2008 +0200 @@ -41,14 +41,16 @@ package org.netbeans.lib.lexer.test.simple; +import java.io.PrintStream; +import java.util.logging.Level; import org.netbeans.lib.lexer.lang.TestTokenId; import java.util.ConcurrentModificationException; +import java.util.logging.Logger; import javax.swing.text.Document; -import junit.framework.TestCase; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.TokenHierarchy; -import org.netbeans.api.lexer.TokenId; import org.netbeans.api.lexer.TokenSequence; +import org.netbeans.junit.NbTestCase; import org.netbeans.lib.lexer.test.LexerTestUtilities; import org.netbeans.lib.lexer.test.ModificationTextDocument; @@ -57,16 +59,30 @@ * * @author mmetelka */ -public class SimpleLexerIncTest extends TestCase { +public class SimpleLexerIncTest extends NbTestCase { public SimpleLexerIncTest(String testName) { super(testName); } + @Override protected void setUp() throws java.lang.Exception { } + @Override protected void tearDown() throws java.lang.Exception { + } + + @Override + public PrintStream getLog() { + return System.out; +// return super.getLog(); + } + + @Override + protected Level logLevel() { + return Level.INFO; +// return super.logLevel();; } public void test() throws Exception { @@ -75,11 +91,19 @@ doc.putProperty(Language.class,TestTokenId.language()); TokenHierarchy hi = TokenHierarchy.get(doc); assertNotNull("Null token hierarchy for document", hi); + + // Check insertion of text that produces token with LA=0 + doc.insertString(0, "+", null); + LexerTestUtilities.incCheck(doc, false); + doc.remove(0, doc.getLength()); + LexerTestUtilities.incCheck(doc, false); + TokenSequence ts = hi.tokenSequence(); assertFalse(ts.moveNext()); // Insert text into document String commentText = "/* test comment */"; + // 0123456789 String text = "abc+uv-xy +-+" + commentText + "def"; int commentTextStartOffset = 13; doc.insertString(0, text, null); @@ -114,7 +138,6 @@ assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "def", offset); assertFalse(ts.moveNext()); - LexerTestUtilities.incCheck(doc, false); // Check TokenSequence.move() @@ -133,7 +156,7 @@ assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "abc", 0); - relOffset = ts.move(5); // to first token "abc" + relOffset = ts.move(5); // to "uv" assertEquals(relOffset, 1); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "uv", 4); @@ -148,6 +171,8 @@ } catch (ConcurrentModificationException e) { // Expected exception } + LexerTestUtilities.incCheck(doc, false); + ts = hi.tokenSequence(); assertTrue(ts.moveNext()); @@ -156,6 +181,8 @@ // Remove added 'd' to become "abc" again doc.remove(2, 1); // should be "abc" again + LexerTestUtilities.incCheck(doc, false); + ts = hi.tokenSequence(); assertTrue(ts.moveNext()); @@ -165,22 +192,23 @@ // Now insert right at the end of first token - identifier with lookahead 1 doc.insertString(3, "x", null); // should become "abcx" + LexerTestUtilities.incCheck(doc, false); ts = hi.tokenSequence(); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "abcx", 0); - LexerTestUtilities.incCheck(doc, false); doc.remove(3, 1); // return back to "abc" + LexerTestUtilities.incCheck(doc, false); ts = hi.tokenSequence(); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "abc", 0); - LexerTestUtilities.incCheck(doc, false); // Now insert right at the end of "+" token - operator with lookahead 1 (because of "+-+" operator) doc.insertString(4, "z", null); // should become "abc" "+" "zuv" + LexerTestUtilities.incCheck(doc, false); ts = hi.tokenSequence(); assertTrue(ts.moveNext()); @@ -191,14 +219,13 @@ LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "zuv", 4); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.MINUS, "-", 7); - LexerTestUtilities.incCheck(doc, false); - + doc.remove(4, 1); // return back to "abc" "+" "uv" - LexerTestUtilities.incCheck(doc, false); // Now insert right after "-" - operator with lookahead 0 doc.insertString(7, "z", null); + LexerTestUtilities.incCheck(doc, false); ts = hi.tokenSequence(); assertTrue(ts.moveNext()); @@ -211,10 +238,8 @@ LexerTestUtilities.assertTokenEquals(ts,TestTokenId.MINUS, "-", 6); assertTrue(ts.moveNext()); LexerTestUtilities.assertTokenEquals(ts,TestTokenId.IDENTIFIER, "zxy", 7); - LexerTestUtilities.incCheck(doc, false); doc.remove(7, 1); // return back to "abc" "+" "uv" - LexerTestUtilities.incCheck(doc, false); // Now insert between "+-" and "+" in "+-+" - operator with lookahead 0 @@ -227,7 +252,9 @@ doc.insertString(doc.getLength(), "-", null); LexerTestUtilities.incCheck(doc, false); // Insert again "-" at the end of the document (now lookahead of preceding is zero) +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.FINE); // Extra logging doc.insertString(doc.getLength(), "-", null); +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.WARNING); // End of extra logging LexerTestUtilities.incCheck(doc, false); // Insert again "+-+" at the end of the document (now lookahead of preceding is zero) doc.insertString(doc.getLength(), "+-+", null); diff -r 06a7890f802e lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerRandomTest.java --- a/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerRandomTest.java Wed May 28 13:50:31 2008 +0200 +++ b/lexer/test/unit/src/org/netbeans/lib/lexer/test/simple/SimpleLexerRandomTest.java Wed May 28 14:48:56 2008 +0200 @@ -41,8 +41,11 @@ package org.netbeans.lib.lexer.test.simple; +import java.io.PrintStream; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.netbeans.junit.NbTestCase; import org.netbeans.lib.lexer.lang.TestTokenId; -import junit.framework.TestCase; import org.netbeans.lib.lexer.test.FixedTextDescriptor; import org.netbeans.lib.lexer.test.LexerTestUtilities; import org.netbeans.lib.lexer.test.RandomCharDescriptor; @@ -55,7 +58,7 @@ * * @author mmetelka */ -public class SimpleLexerRandomTest extends TestCase { +public class SimpleLexerRandomTest extends NbTestCase { public SimpleLexerRandomTest(String testName) { super(testName); @@ -69,6 +72,18 @@ protected void tearDown() throws java.lang.Exception { } + @Override + public PrintStream getLog() { + return System.out; +// return super.getLog(); + } + + @Override + protected Level logLevel() { + return Level.INFO; +// return super.logLevel();; + } + public void testRandom() throws Exception { test(0); } @@ -78,7 +93,7 @@ } private void test(long seed) throws Exception { - TestRandomModify randomModify = new TestRandomModify(seed); + TestRandomModify randomModify = new TestRandomModify(seed, this.getClass()); randomModify.setLanguage(TestTokenId.language()); //randomModify.setDebugOperation(true); @@ -87,28 +102,61 @@ // Check for incorrect lookahead counting problem // after one of the larger updates of the LexerInputOperation's code - randomModify.insertText(0, "+--+"); // "+"[2]; "-"[0]; "-"[0]; "+"[1]; + randomModify.insertText(0, "+--+"); // "+"[2]; "-"[1]; "-"[0]; "+"[1]; randomModify.removeText(2, 1); // "+-+": "+-+"[0]; - randomModify.clearDocument(); - // Check that token list updater respects that the lookaheads + // Check for error with querying laState.lookahead(-1) after implementing lexer input +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.FINE); // Extra logging + randomModify.insertText(0, "--"); + randomModify.insertText(0, "-"); +// Logger.getLogger(org.netbeans.lib.lexer.inc.TokenListUpdater.class.getName()).setLevel(Level.WARNING); // End of extra logging + randomModify.clearDocument(); + + // Check for incorrect backward elimination of extra relexed tokens. + // This required to establish lowestMatchIndex in TokenListUpdater.relex(). + randomModify.insertText(0, "+ +"); + randomModify.insertText(2, "\n-\n"); + randomModify.clearDocument(); + + + // -------------------- SAME-LOOKAHEAD REQUIREMENTS ------------------------- + // Check that the token list updater respects the rule + // that the lookahead of the incrementally created tokens + // is the same like in a regular batch lexing. + // This may not be strictly necessary for correctness but very beneficial because + // then the incrementally patched tokens can be compared after each modification + // with a token list obtained by batch lexing and everything including + // lookaheads and states can be required to be the same. + // + // -------------------- SAME-LOOKAHEAD REQUIREMENT #1 ------------------------- + // Check that when token list updater finds a match (token boundary and states match) + // that also the lookahead of // of subsequent tokens (already present in the token list) // correspond to the lookahead of the relexed token. // In the following example a "+-+" token must be created. - randomModify.insertText(0, "---+"); // "-"[0]; "-"[0]; "-"[0]; "+"[1]; - randomModify.insertText(1, "+"); - randomModify.removeText(3, 1); - + randomModify.insertText(0, "---+"); // "-"[0]; "-"[0]; "-"[0]; "+"[1]; <- see the second token LA=0 + randomModify.insertText(1, "+"); // "-+--+": "-"[0]; "+"[2]; "-"[1]; "-"[0]; "+"[1]; <- seems only "+"[2] was added + // BUT note the next token "-"[1] has to be relexed too since the original had LA=0 + + // Now in addition check that "+-+" will be created. + randomModify.removeText(3, 1); // "-+-+": "-"[0]; "+-+"[0] randomModify.clearDocument(); - // Check that the token list updater checks respects the rule - // that the lookahead of the incrementally created tokens - // is the same like in a regular batch lexing. - randomModify.insertText(0, "+--+--"); - randomModify.removeText(2, 1); + // -------------------- SAME-LOOKAHEAD REQUIREMENT #2 ------------------------- + // Here check that an original token after the match point would not have unnecesarily high LA. + // This could happen if the original token before the match point would have LA longer than + // the length of the token that follows it (the one right after the match point) and so it would affect + // LA of that next token too. Now if the newly relexed token (before match point) would have small LA + // the retained token after match point would still hold the extra LA unnecesarilly. + randomModify.insertText(0, "+--+--"); // "+"[2]; "-"[1]; "-"[0]; "+"[2]; "-"[1]; "-"[0] + randomModify.removeText(2, 1); // "+-+--": Without extra check: "+-+"[0]; "-"[1]; "-"[0] randomModify.clearDocument(); + // BUT in batch lexing the second token would have LA=0. + // A potential fix is to check that when lexing stops that the + + // Check for the previous case but this time the relexing would normally // be skipped but this would lead to lookahead 1 for the "-" token @@ -116,21 +164,20 @@ randomModify.insertText(0, "-+--"); // "-"[0]; "+"[2]; "-"[1]; "-"[0]; // Without extra care it would become "-"[0]; "-"[1]; "-"[0]; randomModify.removeText(1, 1); // "---": "-"[0]; "-"[0]; "-"[0]; - randomModify.clearDocument(); // Similar case to the previous one but with more tokens randomModify.insertText(0, "-+-++--"); randomModify.removeText(1, 4); - randomModify.clearDocument(); + + // Check for the case when token validation cannot be performed // because although the lenghth of the removal is less than // the "+-+" token's length the removal spans token boundaries randomModify.insertText(0, "-+-+ --"); randomModify.removeText(3, 2); - randomModify.clearDocument(); @@ -161,7 +208,7 @@ randomModify.test( new RandomModifyDescriptor[] { - new RandomModifyDescriptor(200, plusMinusTextProvider, + new RandomModifyDescriptor(200, textProvider, 0.2, 0.2, 0.1, 0.2, 0.2, 0.0, 0.0), // snapshots create/destroy