本文整理汇总了Java中com.ibm.icu.text.UTF16类的典型用法代码示例。如果您正苦于以下问题:Java UTF16类的具体用法?Java UTF16怎么用?Java UTF16使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
UTF16类属于com.ibm.icu.text包,在下文中一共展示了UTF16类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getCodePointValue
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final char getCodePointValue(int ch)
{
int offset;
// fastpath for U+0000..U+D7FF
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// copy of getRawOffset()
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
return m_data_[offset];
}
// handle U+D800..U+10FFFF
offset = getCodePointOffset(ch);
// return -1 if there is an error, in this case we return the default
// value: m_initialValue_
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:27,代码来源:CharTrie.java
示例2: getCodePointOffset
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Internal trie getter from a code point.
* Could be faster(?) but longer with
* if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
* Gets the offset to data which the codepoint points to
* @param ch codepoint
* @return offset to data
*/
protected final int getCodePointOffset(int ch)
{
// if ((ch >> 16) == 0) slower
if (ch < 0) {
return -1;
} else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
return getRawOffset(0, (char)ch);
} else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
// BMP codepoint
return getBMPOffset((char)ch);
} else if (ch <= UCharacter.MAX_VALUE) {
// look at the construction of supplementary characters
// trail forms the ends of it.
return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
(char)(ch & SURROGATE_MASK_));
} else {
// return -1 if there is an error, in this case we return
return -1;
}
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:30,代码来源:Trie.java
示例3: currentCodePoint
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Returns the current codepoint
* @return current codepoint
*/
@Override
public int currentCodePoint(){
// cannot use charAt due to it different
// behaviour when index is pointing at a
// trail surrogate, check for surrogates
int ch = current();
if(UTF16.isLeadSurrogate((char)ch)){
// advance the index to get the next code point
next();
// due to post increment semantics current() after next()
// actually returns the next char which is what we want
int ch2 = current();
// current should never change the current index so back off
previous();
if(UTF16.isTrailSurrogate((char)ch2)){
// we found a surrogate pair
return Character.toCodePoint((char)ch, (char)ch2);
}
}
return ch;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:28,代码来源:ReplaceableUCharacterIterator.java
示例4: checkNullNextTrailIndex
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Checks if we are beginning at the start of a initial block.
* If we are then the rest of the codepoints in this initial block
* has the same values.
* We increment m_nextCodepoint_ and relevant data members if so.
* This is used only in for the supplementary codepoints because
* the offset to the trail indexes could be 0.
* @return true if we are at the start of a initial block.
*/
private final boolean checkNullNextTrailIndex()
{
if (m_nextIndex_ <= 0) {
m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_);
int leadBlock =
m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
Trie.INDEX_STAGE_2_SHIFT_;
if (m_trie_.m_dataManipulate_ == null) {
throw new NullPointerException(
"The field DataManipulate in this Trie is null");
}
m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
m_trie_.getValue(leadBlock +
(nextLead & Trie.INDEX_STAGE_3_MASK_)));
m_nextIndex_ --;
m_nextBlockIndex_ = DATA_BLOCK_LENGTH_;
return true;
}
return false;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:31,代码来源:TrieIterator.java
示例5: escape
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Convert characters outside the range U+0020 to U+007F to
* Unicode escapes, and convert backslash to a double backslash.
*/
public static final String escape(String s) {
StringBuilder buf = new StringBuilder();
for (int i=0; i<s.length(); ) {
int c = Character.codePointAt(s, i);
i += UTF16.getCharCount(c);
if (c >= ' ' && c <= 0x007F) {
if (c == '\\') {
buf.append("\\\\"); // That is, "\\"
} else {
buf.append((char)c);
}
} else {
boolean four = c <= 0xFFFF;
buf.append(four ? "\\u" : "\\U");
buf.append(hex(c, four ? 4 : 8));
}
}
return buf.toString();
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:24,代码来源:Utility.java
示例6: hex
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Convert a string to separated groups of hex uppercase
* digits. E.g., hex('ab'...) => "0041,0042". Append the output
* to the given Appendable.
*/
public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
try {
if (useCodePoints) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = Character.codePointAt(s, i);
if (i != 0) {
result.append(separator);
}
result.append(hex(cp,width));
}
} else {
for (int i = 0; i < s.length(); ++i) {
if (i != 0) {
result.append(separator);
}
result.append(hex(s.charAt(i),width));
}
}
return result;
} catch (IOException e) {
throw new IllegalIcuArgumentException(e);
}
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:30,代码来源:Utility.java
示例7: parseUnicodeIdentifier
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Parse a Unicode identifier from the given string at the given
* position. Return the identifier, or null if there is no
* identifier.
* @param str the string to parse
* @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the
* first character to examine. It must be less than str.length(),
* and it must not point to a whitespace character. That is, must
* have pos[0] < str.length(). On
* OUTPUT, the position after the last parsed character.
* @return the Unicode identifier, or null if there is no valid
* identifier at pos[0].
*/
public static String parseUnicodeIdentifier(String str, int[] pos) {
// assert(pos[0] < str.length());
StringBuilder buf = new StringBuilder();
int p = pos[0];
while (p < str.length()) {
int ch = Character.codePointAt(str, p);
if (buf.length() == 0) {
if (UCharacter.isUnicodeIdentifierStart(ch)) {
buf.appendCodePoint(ch);
} else {
return null;
}
} else {
if (UCharacter.isUnicodeIdentifierPart(ch)) {
buf.appendCodePoint(ch);
} else {
break;
}
}
p += UTF16.getCharCount(ch);
}
pos[0] = p;
return buf.toString();
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:38,代码来源:Utility.java
示例8: getCodePointValue
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final int getCodePointValue(int ch)
{
int offset;
// fastpath for U+0000..U+D7FF
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// copy of getRawOffset()
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
return m_data_[offset];
}
// handle U+D800..U+10FFFF
offset = getCodePointOffset(ch);
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:24,代码来源:IntTrie.java
示例9: getSurrogateValue
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Get the value associated with a pair of surrogates.
* @param lead a lead surrogate
* @param trail a trail surrogate
*/
public final int getSurrogateValue(char lead, char trail)
{
if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
throw new IllegalArgumentException(
"Argument characters do not form a supplementary character");
}
// get fold position for the next trail surrogate
int offset = getSurrogateOffset(lead, trail);
// get the real data from the folded lead/trail units
if (offset > 0) {
return m_data_[offset];
}
// return m_initialValue_ if there is an error
return m_initialValue_;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:23,代码来源:IntTrie.java
示例10: getType
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
if (UCharacterUtility.isNonCharacter(ch)) {
// not a character we return a invalid category count
return NON_CHARACTER_;
}
int result = UCharacter.getType(ch);
if (result == UCharacterCategory.SURROGATE) {
if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
result = LEAD_SURROGATE_;
}
else {
result = TRAIL_SURROGATE_;
}
}
return result;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:23,代码来源:UCharacterName.java
示例11: nextTrail32
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
public static int nextTrail32(CharacterIterator ci, int lead) {
if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
return DONE32;
}
int retVal = lead;
if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
char cTrail = ci.next();
if (UTF16.isTrailSurrogate(cTrail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.previous();
}
}
return retVal;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:18,代码来源:CharacterIteration.java
示例12: previous32
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
public static int previous32(CharacterIterator ci) {
if (ci.getIndex() <= ci.getBeginIndex()) {
return DONE32;
}
char trail = ci.previous();
int retVal = trail;
if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
char lead = ci.previous();
if (UTF16.isLeadSurrogate(lead)) {
retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.next();
}
}
return retVal;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:19,代码来源:CharacterIteration.java
示例13: current32
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
public static int current32(CharacterIterator ci) {
char lead = ci.current();
int retVal = lead;
if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
return retVal;
}
if (UTF16.isLeadSurrogate(lead)) {
int trail = (int)ci.next();
ci.previous();
if (UTF16.isTrailSurrogate((char)trail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
}
} else {
if (lead == CharacterIterator.DONE) {
if (ci.getIndex() >= ci.getEndIndex()) {
retVal = DONE32;
}
}
}
return retVal;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:24,代码来源:CharacterIteration.java
示例14: calcStatus
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
return RuleBasedBreakIterator.WORD_NONE;
}
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint)) {
return RuleBasedBreakIterator.WORD_NUMBER;
} else if (UCharacter.isLetter(codepoint)) {
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
开发者ID:jprante,项目名称:elasticsearch-icu,代码行数:18,代码来源:BreakIteratorWrapper.java
示例15: calcStatus
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE)
return RuleBasedBreakIterator.WORD_NONE;
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint))
return RuleBasedBreakIterator.WORD_NUMBER;
else if (UCharacter.isLetter(codepoint)) {
// TODO: try to separately specify ideographic, kana?
// [currently all bundled as letter for this case]
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:BreakIteratorWrapper.java
示例16: getNextDelimiter
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the index of the next delimiter after offset
* @param offset to the source string
* @return offset of the immediate next delimiter, otherwise
* (- source string length - 1) if there
* are no more delimiters after m_nextOffset
*/
private int getNextDelimiter(int offset)
{
if (offset >= 0) {
int result = offset;
int c = 0;
if (delims == null) {
do {
c = UTF16.charAt(m_source_, result);
if (m_delimiters_.contains(c)) {
break;
}
result ++;
} while (result < m_length_);
} else {
do {
c = UTF16.charAt(m_source_, result);
if (c < delims.length && delims[c]) {
break;
}
result ++;
} while (result < m_length_);
}
if (result < m_length_) {
return result;
}
}
return -1 - m_length_;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:36,代码来源:StringTokenizer.java
示例17: getNextNonDelimiter
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the index of the next non-delimiter after m_nextOffset_
* @param offset to the source string
* @return offset of the immediate next non-delimiter, otherwise
* (- source string length - 1) if there
* are no more delimiters after m_nextOffset
*/
private int getNextNonDelimiter(int offset)
{
if (offset >= 0) {
int result = offset;
int c = 0;
if (delims == null) {
do {
c = UTF16.charAt(m_source_, result);
if (!m_delimiters_.contains(c)) {
break;
}
result ++;
} while (result < m_length_);
} else {
do {
c = UTF16.charAt(m_source_, result);
if (!(c < delims.length && delims[c])) {
break;
}
result ++;
} while (result < m_length_);
}
if (result < m_length_) {
return result;
}
}
return -1 - m_length_;
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:36,代码来源:StringTokenizer.java
示例18: nextForCodePoint
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Traverses the trie from the current state for the
* one or two UTF-16 code units for this input code point.
* @param cp A Unicode code point 0..0x10ffff.
* @return The match/value Result.
* @stable ICU 4.8
*/
public Result nextForCodePoint(int cp) {
return cp<=0xffff ?
next(cp) :
(next(UTF16.getLeadSurrogate(cp)).hasNext() ?
next(UTF16.getTrailSurrogate(cp)) :
Result.NO_MATCH);
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:15,代码来源:CharsTrie.java
示例19: getBMPOffset
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the offset to data which the BMP character points to
* Treats a lead surrogate as a normal code point.
* @param ch BMP character
* @return offset to data
*/
protected final int getBMPOffset(char ch)
{
return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE
&& ch <= UTF16.LEAD_SURROGATE_MAX_VALUE)
? getRawOffset(LEAD_INDEX_OFFSET_, ch)
: getRawOffset(0, ch);
// using a getRawOffset(ch) makes no diff
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:15,代码来源:Trie.java
示例20: getDecomposition
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the decomposition for one code point.
* @param c code point
* @return c's decomposition, if it has one; returns null if it does not have a decomposition
*/
public String getDecomposition(int c) {
int decomp=-1;
int norm16;
for(;;) {
if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
// c does not decompose
} else if(isHangul(norm16)) {
// Hangul syllable: decompose algorithmically
StringBuilder buffer=new StringBuilder();
Hangul.decompose(c, buffer);
return buffer.toString();
} else if(isDecompNoAlgorithmic(norm16)) {
decomp=c=mapAlgorithmic(c, norm16);
continue;
} else {
// c decomposes, get everything from the variable-length extra data
int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK;
return extraData.substring(norm16, norm16+length);
}
if(decomp<0) {
return null;
} else {
return UTF16.valueOf(decomp);
}
}
}
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:32,代码来源:Normalizer2Impl.java
注:本文中的com.ibm.icu.text.UTF16类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论