MediaTypeRegistry registry
List<E> detectors
MediaType type
MagicDetector.detect(InputStream, Metadata)
method if a match is found.int length
byte[] pattern
MagicDetector.type
is returned.int patternLength
boolean isRegex
boolean isStringIgnoreCase
byte[] mask
int offsetRangeBegin
int offsetRangeEnd
first offset
.
Note that this is not the offset of the last byte read from the document stream. Instead, the last window of bytes to be compared starts at this offset.
Charset charset
int bytesToTest
Set<E> supportedEmbedTypes
Map<K,V> metadataCommandArguments
String[] command
Runtime.exec(String[])
String commandAssignmentOperator
String commandAssignmentDelimeter
String commandAppendOperator
boolean quoteAssignmentValues
TemporaryResources tmp
ExtractReaderException.TYPE type
ParseContext context
EmbeddedDocumentExtractor embeddedDocumentExtractor
TikaConfig tikaConfig
MimeTypes mimeTypes
Detector detector
ClassLoader loader
Parser parser
Path tikaBin
ParserFactoryFactory parserFactoryFactory
List<E> java
int poolSize
int currentlyInUse
Queue<E> pool
long serverPulseMillis
long serverParseTimeoutMillis
long serverWaitTimeoutMillis
int maxFilesProcessedPerClient
ParseContext context
Parser parser
org.apache.tika.gui.TikaGUI.ImageSavingParser imageParser
CardLayout layout
JPanel cards
JEditorPane html
JEditorPane text
JEditorPane textMain
JEditorPane xml
JEditorPane json
JEditorPane metadata
JFileChooser chooser
Object tag
String string
int slash
MediaType.string
.int semicolon
MediaType.string
from possible parameters. Length of MediaType.string
in case there are no parameters.Map<K,V> parameters
Map<K,V> registry
Map<K,V> inheritance
MediaType type
String acronym
String uti
List<E> links
String description
List<E> magics
List<E> rootXML
int minLength
List<E> extensions
boolean isInterpreted
MimeType rootMimeType
List<E> rootMimeTypeL
MimeType textMimeType
MimeType htmlMimeType
MimeType xmlMimeType
MediaTypeRegistry registry
Map<K,V> types
org.apache.tika.mime.Patterns patterns
List<E> magics
List<E> xmls
MimeTypes mimeTypes
MediaType rootMediaType
float priorMagicFileType
float priorExtensionFileType
float priorMetaFileType
float magic_trust
float extension_trust
float meta_trust
float magic_neg
float extension_neg
float meta_neg
float threshold
float changeRate
EncodingDetector encodingDetector
Detector detector
MediaTypeRegistry registry
List<E> parsers
Parser fallback
DigestingParser.Digester digester
Parser parser
ContentHandlerFactory contentHandlerFactory
RecursiveParserWrapperHandler
boolean catchEmbeddedExceptions
int maxEmbeddedResources
org.apache.tika.parser.RecursiveParserWrapper.ParserState lastParseState
byte[] signature
int version
int header_len
int unknown_000c
long last_modified
long lang_id
byte[] dir_uuid
byte[] stream_uuid
long unknown_offset
long unknown_len
long dir_offset
long dir_len
long data_offset
int dataRemained
int currentPlace
byte[] signature
int version
int header_len
int unknown_000c
long block_len
int blockidx_intvl
int index_depth
int index_root
int index_head
int unknown_0024
long num_blocks
int unknown_002c
long lang_id
byte[] system_uuid
byte[] unknown_0044
int dataRemained
int currentPlace
long size
byte[] signature
long version
long resetInterval
long windowSize
long windowsPerReset
long unknown_18
int dataRemained
int currentPlace
long version
long block_count
long unknown
long table_offset
long uncompressed_len
long compressed_len
long block_len
long[] block_address
int dataRemained
int currentPlace
byte[] signature
long free_space
int dataRemained
int currentPlace
byte[] signature
long free_space
long unknown_0008
int block_prev
int block_next
int dataRemained
int currentPlace
String aeDescriptorPath
String UMLSUser
String UMLSPass
boolean prettyPrint
CTAKESSerializer serializerType
OutputStream stream
boolean serialize
boolean text
String[] metadata
CTAKESAnnotationProperty[] annotationProps
char separatorChar
Set<E> supportedTypes
Map<K,V> metadataPatterns
String[] command
Runtime.exec(String[])
ExternalParser.LineConsumer ignoredLineConsumer
String command
GeoParserConfig config
GeoGazetteerClient gazetteerClient
boolean initialized
URL modelUrl
opennlp.tools.namefind.NameFinderME nameFinder
boolean available
int markLimit
boolean extractScripts
int markLimit
int FMT_ANPA_1312
int FMT_ANPA_UPI
int FMT_ANPA_UPI_DL
int FMT_IPTC_7901
int FMT_IPTC_PHOTO
int FMT_IPTC_CHAR
int FMT_NITF
int FMT_NITF_TT
int FMT_NITF_RB
int FMT_IPTC_AP
int FMT_IPTC_BLM
int FMT_IPTC_NYT
int FMT_IPTC_RTR
int FORMAT
Detector detector
boolean extractAllAlternatives
OfficeParserConfig defaultOfficeParserConfig
Locale locale
boolean extractMacros
boolean includeDeletedContent
boolean includeMoveFromContent
boolean includeShapeBasedContent
boolean includeHeadersAndFooters
boolean includeMissingRows
boolean includeSlideNotes
boolean includeSlideMasterContent
boolean concatenatePhoneticRuns
boolean useSAXDocxExtractor
boolean useSAXPptxExtractor
boolean extractAllAlternativesFromMSG
DecimalFormatSymbols decimalSymbols
DecimalFormat integerFormat
DecimalFormat decimalFormat
DecimalFormat scientificFormat
String tesseractPath
String tessdataPath
String language
String pageSegMode
long minFileSizeToOcr
long maxFileSizeToOcr
int timeout
TesseractOCRConfig.OUTPUT_TYPE outputType
int enableImageProcessing
String imageMagickPath
int density
int depth
String colorspace
String filter
int resize
String pageSeparator
boolean preserveInterwordSpacing
boolean applyRotation
Map<K,V> otherTesseractConfig
TesseractOCRConfig defaultConfig
boolean needToCheck
boolean allowAccessibility
PDFParserConfig defaultConfig
InitializableProblemHandler initializableProblemHandler
boolean enableAutoSpace
boolean suppressDuplicateOverlappingText
boolean extractAnnotationText
boolean sortByPosition
boolean extractAcroFormContent
boolean extractBookmarksText
boolean extractInlineImages
boolean extractUniqueInlineImagesOnly
Float averageCharTolerance
Float spacingTolerance
boolean ifXFAExtractOnlyXFA
PDFParserConfig.OCR_STRATEGY ocrStrategy
int ocrDPI
org.apache.pdfbox.rendering.ImageType ocrImageType
String ocrImageFormatName
float ocrImageQuality
float ocrImageScale
AccessChecker accessChecker
boolean catchIntermediateIOExceptions
boolean extractActions
long maxMainMemoryBytes
boolean setKCMS
boolean detectAngles
int memoryLimitInKb
AgeRecogniserConfig config
Tika secondaryParser
ObjectRecogniser recogniser
int memoryLimitInKb
opennlp.tools.sentiment.SentimentME classifier
String modelPath
The path could be one of the following:
String filePath
boolean mimetype
int minSize
byte[] output
byte[] input
int tmpPos
int outPos
int inSize
int inPos
XHTMLContentHandler xhtml
String stringsPath
int minLength
StringsEncoding encoding
int timeout
boolean stripMarkup
int markLimit
int markLimit
boolean includeDeletedContent
ContentHandlerFactory contentHandlerFactory
int maxEmbeddedResources
int embeddedResources
BasicContentHandlerFactory.HANDLER_TYPE type
int writeLimit
Object tag
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException
ClassNotFoundException
IOException
private void writeObject(ObjectOutputStream ois) throws IOException
IOException
com.adobe.xmp.XMPMeta xmpData
Copyright © 2007–2018 The Apache Software Foundation. All rights reserved.