Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions bundles/org.eclipse.jface.text/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Manifest-Version: 1.0
Bundle-ManifestVersion: 2
Bundle-Name: %pluginName
Bundle-SymbolicName: org.eclipse.jface.text
Bundle-Version: 3.31.0.qualifier
Bundle-Version: 3.31.100.qualifier
Bundle-Vendor: %providerName
Bundle-Localization: plugin
Export-Package:
Expand Down Expand Up @@ -38,7 +38,6 @@ Require-Bundle:
org.eclipse.text;bundle-version="[3.8.0,4.0.0)";visibility:=reexport,
org.eclipse.swt;bundle-version="[3.133.0,4.0.0)",
org.eclipse.jface;bundle-version="[3.39.0,4.0.0)"
Import-Package: com.ibm.icu.text
Bundle-RequiredExecutionEnvironment: JavaSE-21
Automatic-Module-Name: org.eclipse.jface.text
Bundle-Activator: org.eclipse.jface.text.Activator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@

package org.eclipse.jface.text;

import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.util.Locale;

import com.ibm.icu.text.BreakIterator;



/**
* Standard implementation of
Expand Down Expand Up @@ -223,9 +221,54 @@ protected IRegion findExtendedDoubleClickSelection(IDocument document, int offse
* @since 3.5
*/
protected IRegion findWord(IDocument document, int offset) {
IRegion identifier= findIdentifierAt(document, offset);
if (identifier != null) {
return identifier;
}
return findWord(document, offset, getWordBreakIterator());
}

/**
* If the offset lies on an ASCII identifier character ({@code [A-Za-z0-9_]}), or
* just after one, returns the maximal contiguous identifier run. Otherwise
* returns {@code null} so the caller falls back to the locale-aware
* {@link BreakIterator}. This handles identifier-style words containing runs
* of {@code '_'} (e.g. {@code foo__bar}, {@code __aaaa}) consistently across
* JDK versions, since {@link BreakIterator#getWordInstance()} places word
* boundaries between consecutive underscores while users expect such tokens
* to be selected as a single word.
*/
private static IRegion findIdentifierAt(IDocument document, int offset) {
try {
IRegion line= document.getLineInformationOfOffset(offset);
int lineStart= line.getOffset();
int lineEnd= lineStart + line.getLength();
int probe;
if (offset < lineEnd && isIdentifierPart(document.getChar(offset))) {
probe= offset;
} else if (offset > lineStart && isIdentifierPart(document.getChar(offset - 1))) {
probe= offset - 1;
} else {
return null;
}
int start= probe;
while (start > lineStart && isIdentifierPart(document.getChar(start - 1))) {
start--;
}
int end= probe + 1;
while (end < lineEnd && isIdentifierPart(document.getChar(end))) {
end++;
}
return new Region(start, end - start);
} catch (BadLocationException e) {
return null;
}
}

private static boolean isIdentifierPart(char c) {
return c == '_' || (c < 128 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z'));
}
Comment on lines +268 to +270
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this related to Character.isJavaIdentifierPart(char) ? Or would it make sense to reuse it, although it's not identical atm.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's conceptually related (both answer "is this an identifier character?"), but Character.isJavaIdentifierPart should not be reused here. It must stay ASCII-only by design. This method is the guard for a fast-path that only fires for [A-Za-z0-9_]. For anything else it returns null so the caller falls back to BreakIterator.


/**
* Returns the locale specific word break iterator.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,113 @@ public void testClickAtLineEnd() throws Exception {
assertEquals("you", document.get(selection.getOffset(), selection.getLength()), "Unexpected selection");
}

@Test
public void testClickJustPastIdentifierSelectsThatIdentifier() throws Exception {
String content= "foo bar baz";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
// Click at offset 3: the space right after "foo".
IRegion selection= strategy.findWord(document, 3);
assertNotNull(selection);
assertEquals("foo", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testClickAtIdentifierStartSelectsWholeIdentifier() throws Exception {
String content= "foo __aaaa bar";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
// Click at offset 4: the first '_' starting "__aaaa".
IRegion selection= strategy.findWord(document, 4);
assertNotNull(selection);
assertEquals("__aaaa", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testIdentifierAtLineStartAndEnd() throws Exception {
String content= "_foo___\nbar_baz";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
// First line: every offset 0..7 should yield "_foo___".
for (int offset= 0; offset <= 7; offset++) {
IRegion selection= strategy.findWord(document, offset);
assertNotNull(selection, "no selection at offset " + offset);
assertEquals("_foo___", document.get(selection.getOffset(), selection.getLength()),
"unexpected selection at offset " + offset);
}
// Second line.
IRegion selection= strategy.findWord(document, 11);
assertNotNull(selection);
assertEquals("bar_baz", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testSingleLineDocument() throws Exception {
String content= "abc";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
IRegion selection= strategy.findWord(document, 0);
assertNotNull(selection);
assertEquals("abc", document.get(selection.getOffset(), selection.getLength()));
selection= strategy.findWord(document, document.getLength());
assertNotNull(selection);
assertEquals("abc", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testIdentifierSurroundedByPunctuation() throws Exception {
String content= "(foo_bar);";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
// Click in the middle of the identifier.
IRegion selection= strategy.findWord(document, 4);
assertNotNull(selection);
assertEquals("foo_bar", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testCjkWordSelection() throws Exception {
// Japanese text without spaces. The word break iterator segments it into a
// Hiragana run ("こんにちは") followed by a Kanji run
// ("世界"). This segmentation is locale-independent, so double-click
// selects the script run the click lands in rather than the whole line.
String content= "こんにちは世界";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
// Click inside the Hiragana run.
IRegion selection= strategy.findWord(document, 2);
assertNotNull(selection);
assertEquals("こんにちは", document.get(selection.getOffset(), selection.getLength()));
// Click inside the Kanji run.
selection= strategy.findWord(document, 6);
assertNotNull(selection);
assertEquals("世界", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testCjkTokenBetweenSpaces() throws Exception {
String content= "foo 我是 bar";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
// Click inside the CJK token.
IRegion selection= strategy.findWord(document, 5);
assertNotNull(selection);
assertEquals("我是", document.get(selection.getOffset(), selection.getLength()));
}

@Test
public void testThaiTokenBetweenSpaces() throws Exception {
// Dictionary-based segmentation of a contiguous Thai run only happens under a
// Thai locale, so this test delimits the token with spaces to stay
// locale-independent: double-click selects the whole Thai token.
String content= "foo ไทย bar";
IDocument document= new Document(content);
TestSpecificDefaultTextDoubleClickStrategy strategy= new TestSpecificDefaultTextDoubleClickStrategy();
IRegion selection= strategy.findWord(document, 5);
assertNotNull(selection);
assertEquals("ไทย", document.get(selection.getOffset(), selection.getLength()));
}

private static final class TestSpecificDefaultTextDoubleClickStrategy extends DefaultTextDoubleClickStrategy {

@Override
Expand Down
Loading