diff --git a/editor.lib/src/org/netbeans/editor/FinderFactory.java b/editor.lib/src/org/netbeans/editor/FinderFactory.java
--- a/editor.lib/src/org/netbeans/editor/FinderFactory.java
+++ b/editor.lib/src/org/netbeans/editor/FinderFactory.java
@@ -135,7 +135,16 @@
if (buffer[offset] == '\t') {
visCol = (visCol + tabSize) / tabSize * tabSize;
} else {
- visCol++;
+ // #17356
+ int codePoint;
+ if (Character.isHighSurrogate(buffer[offset])) {
+ codePoint = Character.toCodePoint(buffer[offset], buffer[offset + 1]);
+ offset++;
+ } else {
+ codePoint = buffer[offset];
+ }
+ int w = WcwdithUtil.wcwidth(codePoint);
+ visCol += w > 0 ? w : 0;
}
offset++;
}
@@ -200,7 +209,19 @@
found = true;
return bufferStartPos + offset;
default:
- curVisCol++;
+ {
+ // #17356
+ int codePoint;
+ if (Character.isHighSurrogate(buffer[offset])) {
+ codePoint = Character.toCodePoint(buffer[offset], buffer[offset + 1]);
+ offset++;
+ } else {
+ codePoint = buffer[offset];
+ }
+ int w = WcwdithUtil.wcwidth(codePoint);
+ curVisCol += w > 0 ? w : 0;
+ }
+ break;
}
offset++;
}
diff --git a/editor.lib/src/org/netbeans/editor/WcwdithUtil.java b/editor.lib/src/org/netbeans/editor/WcwdithUtil.java
new file mode 100644
--- /dev/null
+++ b/editor.lib/src/org/netbeans/editor/WcwdithUtil.java
@@ -0,0 +1,332 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
+ *
+ * The contents of this file are subject to the terms of either the GNU
+ * General Public License Version 2 only ("GPL") or the Common
+ * Development and Distribution License("CDDL") (collectively, the
+ * "License"). You may not use this file except in compliance with the
+ * License. You can obtain a copy of the License at
+ * http://www.netbeans.org/cddl-gplv2.html
+ * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
+ * specific language governing permissions and limitations under the
+ * License. When distributing the software, include this License Header
+ * Notice in each file and include the License file at
+ * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the GPL Version 2 section of the License file that
+ * accompanied this code. If applicable, add the following below the
+ * License Header, with the fields enclosed by brackets [] replaced by
+ * your own identifying information:
+ * "Portions Copyrighted [year] [name of copyright owner]"
+ *
+ * Contributor(s):
+ *
+ * The Original Software is NetBeans. The Initial Developer of the Original
+ * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
+ * Microsystems, Inc. All Rights Reserved.
+ *
+ * If you wish your version of this file to be governed by only the CDDL
+ * or only the GPL Version 2, indicate your decision by adding
+ * "[Contributor] elects to include this software in this distribution
+ * under the [CDDL or GPL Version 2] license." If you do not indicate a
+ * single choice of license, a recipient has the option to distribute
+ * your version of this file under either the CDDL, the GPL Version 2 or
+ * to extend the choice of license to its licensees as provided above.
+ * However, if you add GPL Version 2 code and therefore, elected the GPL
+ * Version 2 license, then the option applies only if the new code is
+ * made subject to such option by the copyright holder.
+ */
+
+package org.netbeans.editor;
+
+/**
+ * This is an implementation of wcwidth() and wcswidth() (defined in
+ * IEEE Std 1002.1-2001) for Unicode.
+ *
+ *
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
+ *
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
In fixed-width output devices, Latin characters all occupy a single + * "cell" position of equal width, whereas ideographic CJK characters + * occupy two such cells. Interoperability between terminal-line + * applications and (teletype-style) character terminals using the + * UTF-8 encoding requires agreement on which character should advance + * the cursor by how many cell positions. No established formal + * standards exist at present on which Unicode character shall occupy + * how many cell positions on character terminals. These routines are + * a first attempt of defining such behavior based on simple rules + * applied to data provided by the Unicode Consortium.
+ * + *For some graphical characters, the Unicode standard explicitly + * defines a character-cell width via the definition of the East Asian + * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. + * In all these cases, there is no ambiguity about which width a + * terminal shall use. For characters in the East Asian Ambiguous (A) + * class, the width choice depends purely on a preference of backward + * compatibility with either historic CJK or Western practice. + * Choosing single-width for these characters is easy to justify as + * the appropriate long-term solution, as the CJK practice of + * displaying these characters as double-width comes from historic + * implementation simplicity (8-bit encoded characters were displayed + * single-width and 16-bit ones double-width, even for Greek, + * Cyrillic, etc.) and not any typographic considerations.
+ * + *Much less clear is the choice of width for the Not East Asian + * (Neutral) class. Existing practice does not dictate a width for any + * of these characters. It would nevertheless make sense + * typographically to allocate two character cells to characters such + * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be + * represented adequately with a single-width glyph. The following + * routines at present merely assign a single-cell width to all + * neutral characters, in the interest of simplicity. This is not + * entirely satisfactory and should be reconsidered before + * establishing a formal standard in this area. At the moment, the + * decision which Not East Asian (Neutral) characters should be + * represented by double-width glyphs cannot yet be answered by + * applying a simple rule from the Unicode database content. Setting + * up a proper standard for the behavior of UTF-8 character terminals + * will require a careful analysis not only of each Unicode character, + * but also of each presentation form, something the author of these + * routines has avoided to do so far.
+ * + *+ * http://www.unicode.org/unicode/reports/tr11/
+ * + *Markus Kuhn -- 2007-05-26 (Unicode 5.0)
+ * + *Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software.
+ * + * Latest version: + * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + * + * @author johnsonlau@netbeans.org + */ +public class WcwdithUtil { + + /** sorted list of non-overlapping intervals of non-spacing characters.*/ + /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ + static final int[][] nonSpacingCharTable = { + {0x0300, 0x036F}, {0x0483, 0x0486}, {0x0488, 0x0489}, + {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, + {0x05C4, 0x05C5}, {0x05C7, 0x05C7}, {0x0600, 0x0603}, + {0x0610, 0x0615}, {0x064B, 0x065E}, {0x0670, 0x0670}, + {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, + {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A}, + {0x07A6, 0x07B0}, {0x07EB, 0x07F3}, {0x0901, 0x0902}, + {0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D}, + {0x0951, 0x0954}, {0x0962, 0x0963}, {0x0981, 0x0981}, + {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, + {0x09E2, 0x09E3}, {0x0A01, 0x0A02}, {0x0A3C, 0x0A3C}, + {0x0A41, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, + {0x0A70, 0x0A71}, {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, + {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, + {0x0AE2, 0x0AE3}, {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, + {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43}, {0x0B4D, 0x0B4D}, + {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0}, + {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, + {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0CBC, 0x0CBC}, + {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD}, + {0x0CE2, 0x0CE3}, {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, + {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, + {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, + {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, + {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, + {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, + {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, + {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, + {0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039}, + {0x1058, 0x1059}, {0x1160, 0x11FF}, {0x135F, 0x135F}, + {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, + {0x1772, 0x1773}, {0x17B4, 0x17B5}, {0x17B7, 0x17BD}, + {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, + {0x180B, 0x180D}, {0x18A9, 0x18A9}, {0x1920, 0x1922}, + {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B}, + {0x1A17, 0x1A18}, {0x1B00, 0x1B03}, {0x1B34, 0x1B34}, + {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42}, + {0x1B6B, 0x1B73}, {0x1DC0, 0x1DCA}, {0x1DFE, 0x1DFF}, + {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x2063}, + {0x206A, 0x206F}, {0x20D0, 0x20EF}, {0x302A, 0x302F}, + {0x3099, 0x309A}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, + {0xA825, 0xA826}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, + {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, + {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, + {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F}, {0x1D167, 0x1D169}, + {0x1D173, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, + {0x1D242, 0x1D244}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}, + {0xE0100, 0xE01EF} + }; + + /** + * Returns defined column width of a Unicode character. + * + *The following functions defines the column width of an ISO 10646 + * character as follows:
+ * + *