Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
ALEF |
1575 |
public static final char |
ALEF_HAMZA_ABOVE |
1571 |
public static final char |
ALEF_HAMZA_BELOW |
1573 |
public static final char |
ALEF_MADDA |
1570 |
public static final char |
DAMMA |
1615 |
public static final char |
DAMMATAN |
1612 |
public static final char |
DOTLESS_YEH |
1609 |
public static final char |
FATHA |
1614 |
public static final char |
FATHATAN |
1611 |
public static final char |
HEH |
1607 |
public static final char |
KASRA |
1616 |
public static final char |
KASRATAN |
1613 |
public static final char |
SHADDA |
1617 |
public static final char |
SUKUN |
1618 |
public static final char |
TATWEEL |
1600 |
public static final char |
TEH_MARBUTA |
1577 |
public static final char |
YEH |
1610 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
ALEF |
1575 |
public static final char |
BEH |
1576 |
public static final char |
FEH |
1601 |
public static final char |
HEH |
1607 |
public static final char |
KAF |
1603 |
public static final char |
LAM |
1604 |
public static final char |
NOON |
1606 |
public static final char |
TEH |
1578 |
public static final char |
TEH_MARBUTA |
1577 |
public static final char |
WAW |
1608 |
public static final char |
YEH |
1610 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DOUBLE_TYPE |
"<DOUBLE>" |
public static final int |
HAN |
1 |
public static final int |
HANGUL |
8 |
public static final int |
HIRAGANA |
2 |
public static final int |
KATAKANA |
4 |
public static final String |
SINGLE_TYPE |
"<SINGLE>" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
GRAM_TYPE |
"gram" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_SUBWORD_SIZE |
15 |
public static final int |
DEFAULT_MIN_SUBWORD_SIZE |
2 |
public static final int |
DEFAULT_MIN_WORD_SIZE |
5 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final int |
BLOCK_SIZE |
2048 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_BUFFER_SIZE |
256 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
FORMAT_SNOWBALL |
"snowball" |
public static final String |
FORMAT_WORDSET |
"wordset" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
RULE_JAVA |
"java" |
public static final String |
RULE_UNICODE |
"unicode" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"danish_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"german_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"spanish_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
public static final String |
STOPWORDS_COMMENT |
"#" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
FARSI_YEH |
1740 |
public static final char |
HAMZA_ABOVE |
1620 |
public static final char |
HEH |
1607 |
public static final char |
HEH_GOAL |
1729 |
public static final char |
HEH_YEH |
1728 |
public static final char |
KAF |
1603 |
public static final char |
KEHEH |
1705 |
public static final char |
YEH |
1610 |
public static final char |
YEH_BARREE |
1746 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"finnish_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"french_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"hungarian_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"italian_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_BUCKET_COUNT |
512 |
public static final int |
DEFAULT_HASH_COUNT |
1 |
public static final int |
DEFAULT_HASH_SET_SIZE |
1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_TOKEN_LENGTH |
2147483647 |
public static final int |
DEFAULT_MAX_WORD_COUNT |
2147483647 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
FORCE_FIRST_LETTER |
"forceFirstLetter" |
public static final String |
KEEP |
"keep" |
public static final String |
KEEP_IGNORE_CASE |
"keepIgnoreCase" |
public static final String |
MAX_TOKEN_LENGTH |
"maxTokenLength" |
public static final String |
MAX_WORD_COUNT |
"maxWordCount" |
public static final String |
MIN_WORD_LENGTH |
"minWordLength" |
public static final String |
OK_PREFIX |
"okPrefix" |
public static final String |
ONLY_FIRST_WORD |
"onlyFirstWord" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
MAX_KEY |
"max" |
public static final String |
MIN_KEY |
"min" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_GRAPH_EXPANSIONS |
10000 |
public static final boolean |
DEFAULT_PRESERVE_POSITION_INCREMENTS |
true |
public static final boolean |
DEFAULT_PRESERVE_SEP |
true |
public static final int |
SEP_LABEL |
31 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DATE_TYPE |
"date" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DATE_PATTERN |
"datePattern" |
public static final String |
LOCALE |
"locale" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
DEFAULT_DELIMITER |
124 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DELIMITER_ATTR |
"delimiter" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_OUTPUT_TOKEN_SIZE |
1024 |
public static final char |
DEFAULT_SEPARATOR |
32 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
MAX_OUTPUT_TOKEN_SIZE_KEY |
"maxOutputTokenSize" |
public static final String |
SEPARATOR_KEY |
"separator" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PATTERN |
"pattern" |
public static final String |
PROTECTED_TOKENS |
"protected" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
MAX_KEY |
"max" |
public static final String |
MIN_KEY |
"min" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
CONSUME_ALL_TOKENS_KEY |
"consumeAllTokens" |
public static final String |
MAX_TOKEN_COUNT_KEY |
"maxTokenCount" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
CONSUME_ALL_TOKENS_KEY |
"consumeAllTokens" |
public static final String |
MAX_START_OFFSET |
"maxStartOffset" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
CONSUME_ALL_TOKENS_KEY |
"consumeAllTokens" |
public static final String |
MAX_TOKEN_POSITION_KEY |
"maxTokenPosition" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
FILTER_ARG_SEPARATOR |
46 |
public static final char |
FILTER_NAME_ID_SEPARATOR |
45 |
public static final String |
PROTECTED_TERMS |
"protected" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PREFIX_LENGTH_KEY |
"prefixLength" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
ALPHA |
3 |
public static final int |
ALPHANUM |
7 |
public static final int |
CATENATE_ALL |
16 |
public static final int |
CATENATE_NUMBERS |
8 |
public static final int |
CATENATE_WORDS |
4 |
public static final int |
DIGIT |
4 |
public static final int |
GENERATE_NUMBER_PARTS |
2 |
public static final int |
GENERATE_WORD_PARTS |
1 |
public static final int |
IGNORE_KEYWORDS |
512 |
public static final int |
LOWER |
1 |
public static final int |
PRESERVE_ORIGINAL |
32 |
public static final int |
SPLIT_ON_CASE_CHANGE |
64 |
public static final int |
SPLIT_ON_NUMERICS |
128 |
public static final int |
STEM_ENGLISH_POSSESSIVE |
256 |
public static final int |
SUBWORD_DELIM |
8 |
public static final int |
UPPER |
2 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PROTECTED_TOKENS |
"protected" |
public static final String |
TYPES |
"types" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
CATENATE_ALL |
16 |
public static final int |
CATENATE_NUMBERS |
8 |
public static final int |
CATENATE_WORDS |
4 |
public static final int |
GENERATE_NUMBER_PARTS |
2 |
public static final int |
GENERATE_WORD_PARTS |
1 |
public static final int |
IGNORE_KEYWORDS |
512 |
public static final int |
PRESERVE_ORIGINAL |
32 |
public static final int |
SPLIT_ON_CASE_CHANGE |
64 |
public static final int |
SPLIT_ON_NUMERICS |
128 |
public static final int |
STEM_ENGLISH_POSSESSIVE |
256 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PROTECTED_TOKENS |
"protected" |
public static final String |
TYPES |
"types" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
ALPHA |
3 |
public static final int |
ALPHANUM |
7 |
public static final int |
DONE |
-1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_GRAM_SIZE |
1 |
public static final int |
DEFAULT_MIN_GRAM_SIZE |
1 |
public static final boolean |
DEFAULT_PRESERVE_ORIGINAL |
false |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_GRAM_SIZE |
1 |
public static final int |
DEFAULT_MIN_GRAM_SIZE |
1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_NGRAM_SIZE |
2 |
public static final int |
DEFAULT_MIN_NGRAM_SIZE |
1 |
public static final boolean |
DEFAULT_PRESERVE_ORIGINAL |
false |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_NGRAM_SIZE |
2 |
public static final int |
DEFAULT_MIN_NGRAM_SIZE |
1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"dutch_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"norwegian_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
BOKMAAL |
1 |
public static final int |
NYNORSK |
2 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
DEFAULT_DELIMITER |
47 |
public static final int |
DEFAULT_SKIP |
0 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
DEFAULT_DELIMITER |
47 |
public static final int |
DEFAULT_SKIP |
0 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
GROUP |
"group" |
public static final String |
PATTERN |
"pattern" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PATTERN |
"pattern" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PATTERN |
"pattern" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
DEFAULT_DELIMITER |
124 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DELIMITER_ATTR |
"delimiter" |
public static final String |
ENCODER_ATTR |
"encoder" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"portuguese_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final float |
defaultMaxDocFreqPercent |
0.4000000059604645f |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
INFORMATION_SEPARATOR_MARKER |
31 |
public static final char |
PUA_EC00_MARKER |
60416 |
public static final char |
RTL_DIRECTION_MARKER |
8207 |
public static final char |
START_OF_HEADING_MARKER |
1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"russian_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_FILLER_TOKEN |
"_" |
public static final int |
DEFAULT_MAX_SHINGLE_SIZE |
2 |
public static final int |
DEFAULT_MIN_SHINGLE_SIZE |
2 |
public static final String |
DEFAULT_TOKEN_SEPARATOR |
" " |
public static final String |
DEFAULT_TOKEN_TYPE |
"shingle" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PROTECTED_TOKENS |
"protected" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_TOKEN_LENGTH |
255 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
ACRONYM |
2 |
public static final int |
ACRONYM_DEP |
8 |
public static final int |
ALPHANUM |
0 |
public static final int |
APOSTROPHE |
1 |
public static final int |
CJ |
7 |
public static final int |
COMPANY |
3 |
public static final int |
EMAIL |
4 |
public static final int |
HOST |
5 |
public static final int |
NUM |
6 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_TOKEN_LENGTH |
255 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
ALPHANUM |
0 |
public static final int |
EMAIL |
8 |
public static final int |
HANGUL |
6 |
public static final int |
HIRAGANA |
4 |
public static final int |
IDEOGRAPHIC |
3 |
public static final int |
KATAKANA |
5 |
public static final int |
MAX_TOKEN_LENGTH_LIMIT |
1048576 |
public static final int |
NUM |
1 |
public static final int |
SOUTHEAST_ASIAN |
2 |
public static final int |
URL |
7 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
AVOID_BAD_URL |
2 |
public static final int |
EMAIL_TYPE |
8 |
public static final int |
HANGUL_TYPE |
6 |
public static final int |
HIRAGANA_TYPE |
4 |
public static final int |
IDEOGRAPHIC_TYPE |
3 |
public static final int |
KATAKANA_TYPE |
5 |
public static final int |
NUMERIC_TYPE |
1 |
public static final int |
SOUTH_EAST_ASIAN_TYPE |
2 |
public static final int |
URL_TYPE |
7 |
public static final int |
WORD_TYPE |
0 |
public static final int |
YYEOF |
-1 |
public static final int |
YYINITIAL |
0 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"swedish_stop.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
TYPE_SYNONYM |
"SYNONYM" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
TYPE_SYNONYM |
"SYNONYM" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
WORD_SEPARATOR |
0 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DEFAULT_STOPWORD_FILE |
"stopwords.txt" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
LUCENE_MATCH_VERSION_PARAM |
"luceneMatchVersion" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_WORD_LEN |
255 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final int |
BUFFERMAX |
1024 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
UNICODE_VERSION |
"10.0.0.0" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
ACRONYM_ID |
2 |
public static final int |
ALPHANUM_ID |
0 |
public static final int |
APOSTROPHE_ID |
1 |
public static final String |
BOLD |
"b" |
public static final int |
BOLD_ID |
12 |
public static final String |
BOLD_ITALICS |
"bi" |
public static final int |
BOLD_ITALICS_ID |
14 |
public static final int |
BOTH |
2 |
public static final String |
CATEGORY |
"c" |
public static final int |
CATEGORY_ID |
11 |
public static final String |
CITATION |
"ci" |
public static final int |
CITATION_ID |
10 |
public static final int |
CJ_ID |
7 |
public static final int |
COMPANY_ID |
3 |
public static final int |
EMAIL_ID |
4 |
public static final String |
EXTERNAL_LINK |
"el" |
public static final int |
EXTERNAL_LINK_ID |
9 |
public static final String |
EXTERNAL_LINK_URL |
"elu" |
public static final int |
EXTERNAL_LINK_URL_ID |
17 |
public static final String |
HEADING |
"h" |
public static final int |
HEADING_ID |
15 |
public static final int |
HOST_ID |
5 |
public static final String |
INTERNAL_LINK |
"il" |
public static final int |
INTERNAL_LINK_ID |
8 |
public static final String |
ITALICS |
"i" |
public static final int |
ITALICS_ID |
13 |
public static final int |
NUM_ID |
6 |
public static final String |
SUB_HEADING |
"sh" |
public static final int |
SUB_HEADING_ID |
16 |
public static final int |
TOKENS_ONLY |
0 |
public static final int |
UNTOKENIZED_ONLY |
1 |
public static final int |
UNTOKENIZED_TOKEN_FLAG |
1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
TOKEN_OUTPUT |
"tokenOutput" |
public static final String |
UNTOKENIZED_TYPES |
"untokenizedTypes" |
Copyright © 2000-2018 Apache Software Foundation. All Rights Reserved.