public class Lucene extends Object
Modifier and Type | Field and Description |
---|---|
static org.apache.lucene.analysis.Analyzer |
AGGRESSIVE_TRANSFORM |
static org.apache.lucene.analysis.Analyzer |
ENGLISH |
static org.apache.lucene.document.FieldType |
FULL_INDEX
use this when you want to store term vectors etc.
|
static org.apache.lucene.document.FieldType |
JUST_INDEX |
static org.apache.lucene.analysis.Analyzer |
KEYWORD |
static org.apache.lucene.analysis.Analyzer |
MINIMAL |
static org.apache.lucene.analysis.Analyzer |
SIMPLE |
static org.apache.lucene.analysis.Analyzer |
STANDARD |
static org.apache.lucene.util.Version |
version |
static org.apache.lucene.analysis.Analyzer |
WHITESPACE |
Constructor and Description |
---|
Lucene() |
Modifier and Type | Method and Description |
---|---|
static double |
getIdf(org.apache.lucene.index.IndexReader reader,
String textField,
String term)
returns idf for a given term
|
static Map<String,Float> |
getIdfs(org.apache.lucene.index.IndexReader reader,
String field)
uses defaultSimilarity to compute idf.
|
static Map<String,Float> |
getIdfs(org.apache.lucene.index.IndexReader reader,
String field,
org.apache.lucene.search.similarities.TFIDFSimilarity tfidfSIM)
uses custom similarity to compute idf, use this if you want to implement
IDF(numDocs,docFreq)
|
static int |
getLuceneDocId(org.apache.lucene.index.IndexReader reader,
String docIdField,
String docId)
returns tf-idf = (term_freq/inversve_doc_freq) for a given doc and a term.
|
static double |
getTf(org.apache.lucene.index.IndexReader reader,
int luceneDocId,
String textField,
String term)
returns tf for a given term in a given doc
|
static double |
getTf(org.apache.lucene.index.IndexReader reader,
String docIdField,
String docId,
String textField,
String term)
returns tf for a given term in a given doc
|
static double |
getTfIdf(org.apache.lucene.index.IndexReader reader,
int luceneDocId,
String textField,
String term)
returns tf-idf = (term_freq/inversve_doc_freq) for a given doc and a term.
|
static double |
getTfIdf(org.apache.lucene.index.IndexReader reader,
String docIdField,
String docId,
String textField,
String term)
returns tf-idf = (term_freq/inversve_doc_freq) for a given doc and a term.
|
static Map<String,Float> |
getTfs(org.apache.lucene.index.IndexReader reader,
String field,
int docID)
returns term freq for a given doc.
|
static boolean |
indexExists(String pageIndex) |
static boolean |
isDeleted(org.apache.lucene.index.IndexReader indexReader,
int docID)
checks if docId is deleted in the index
|
static org.apache.lucene.index.IndexWriterConfig |
newConfig(org.apache.lucene.analysis.Analyzer analyzer)
creates a config for a writer using a specified analyzer
|
static org.apache.lucene.search.Query |
newQuery(String field,
String query)
Uses default query parser
|
static org.apache.lucene.queryparser.classic.QueryParser |
newQueryParser(String string)
Uses default analyzer
|
static org.apache.lucene.queryparser.classic.QueryParser |
newQueryParser(String field,
org.apache.lucene.analysis.Analyzer analyzer) |
static org.apache.lucene.index.IndexReader |
ramReader(String pathToIndex) |
static org.apache.lucene.index.IndexReader |
reader(String pathToIndex) |
static org.apache.lucene.index.IndexReader |
reader(String dir,
String... children) |
static org.apache.lucene.index.TermsEnum |
safeEnum(org.apache.lucene.index.Terms terms) |
static org.apache.lucene.search.IndexSearcher |
searcher(org.apache.lucene.store.Directory dir) |
static org.apache.lucene.search.IndexSearcher |
searcher(org.apache.lucene.index.IndexReader reader) |
static org.apache.lucene.search.IndexSearcher |
searcher(String path) |
static org.apache.lucene.search.IndexSearcher |
searcher(String dir,
String... children) |
static org.apache.lucene.index.IndexWriter |
simpleStemmingWriter(String indexDir) |
static org.apache.lucene.index.IndexWriter |
simpleWriter(String pathToIndexDir)
returns a index writer configured to use a simple analyzer
|
static org.apache.lucene.index.IndexWriter |
storeOnlyWriter(String pathToIndexDir) |
static Iterable<org.apache.lucene.index.TermsEnum> |
terms(org.apache.lucene.index.Terms terms) |
static org.apache.lucene.index.IndexWriter |
writer(String pathToIndexDir,
org.apache.lucene.index.IndexWriterConfig config)
returns a index writer with the specified writer config.
|
public static org.apache.lucene.document.FieldType FULL_INDEX
public static org.apache.lucene.document.FieldType JUST_INDEX
public static final org.apache.lucene.util.Version version
public static final org.apache.lucene.analysis.Analyzer ENGLISH
public static final org.apache.lucene.analysis.Analyzer STANDARD
public static final org.apache.lucene.analysis.Analyzer SIMPLE
public static final org.apache.lucene.analysis.Analyzer KEYWORD
public static final org.apache.lucene.analysis.Analyzer WHITESPACE
public static final org.apache.lucene.analysis.Analyzer AGGRESSIVE_TRANSFORM
public static final org.apache.lucene.analysis.Analyzer MINIMAL
public static org.apache.lucene.index.IndexWriterConfig newConfig(org.apache.lucene.analysis.Analyzer analyzer)
analyzer
- public static org.apache.lucene.index.IndexWriter writer(String pathToIndexDir, org.apache.lucene.index.IndexWriterConfig config) throws IOException
pathToIndexDir
- config
- IOException
public static org.apache.lucene.index.IndexWriter simpleWriter(String pathToIndexDir) throws IOException
pathToIndexDir
- IOException
public static org.apache.lucene.index.IndexWriter simpleStemmingWriter(String indexDir) throws IOException
IOException
public static boolean isDeleted(org.apache.lucene.index.IndexReader indexReader, int docID)
indexReader
- docID
- public static org.apache.lucene.index.IndexWriter storeOnlyWriter(String pathToIndexDir) throws IOException
IOException
public static org.apache.lucene.index.IndexReader ramReader(String pathToIndex) throws IOException
IOException
public static org.apache.lucene.index.IndexReader reader(String dir, String... children) throws IOException
IOException
public static org.apache.lucene.index.IndexReader reader(String pathToIndex) throws IOException
IOException
public static org.apache.lucene.search.IndexSearcher searcher(org.apache.lucene.index.IndexReader reader) throws IOException
IOException
public static org.apache.lucene.search.IndexSearcher searcher(org.apache.lucene.store.Directory dir) throws IOException
IOException
public static org.apache.lucene.search.IndexSearcher searcher(String dir, String... children) throws IOException
IOException
public static org.apache.lucene.search.IndexSearcher searcher(String path) throws IOException
IOException
public static Iterable<org.apache.lucene.index.TermsEnum> terms(org.apache.lucene.index.Terms terms)
terms
- public static org.apache.lucene.index.TermsEnum safeEnum(org.apache.lucene.index.Terms terms)
terms
- public static org.apache.lucene.queryparser.classic.QueryParser newQueryParser(String string)
string
- public static org.apache.lucene.search.Query newQuery(String field, String query) throws org.apache.lucene.queryparser.classic.ParseException
field
- query
- org.apache.lucene.queryparser.classic.ParseException
public static boolean indexExists(String pageIndex)
public static org.apache.lucene.queryparser.classic.QueryParser newQueryParser(String field, org.apache.lucene.analysis.Analyzer analyzer)
public static Map<String,Float> getIdfs(org.apache.lucene.index.IndexReader reader, String field) throws IOException
reader
- field
- IOException
public static Map<String,Float> getIdfs(org.apache.lucene.index.IndexReader reader, String field, org.apache.lucene.search.similarities.TFIDFSimilarity tfidfSIM) throws IOException
reader
- field
- tfidfSIM
- IOException
public static Map<String,Float> getTfs(org.apache.lucene.index.IndexReader reader, String field, int docID) throws IOException
reader
- field
- IOException
public static int getLuceneDocId(org.apache.lucene.index.IndexReader reader, String docIdField, String docId) throws IOException
reader
- docIdField
- docId
- IOException
public static double getTfIdf(org.apache.lucene.index.IndexReader reader, String docIdField, String docId, String textField, String term) throws IOException
reader
- docIdField
- docId
- textField
- term
- IOException
public static double getTfIdf(org.apache.lucene.index.IndexReader reader, int luceneDocId, String textField, String term) throws IOException
reader
- luceneDocId
- textField
- term
- IOException
public static double getTf(org.apache.lucene.index.IndexReader reader, int luceneDocId, String textField, String term) throws IOException
reader
- luceneDocId
- textField
- term
- IOException
public static double getTf(org.apache.lucene.index.IndexReader reader, String docIdField, String docId, String textField, String term) throws IOException
reader
- docIdField
- docId
- textField
- term
- IOException
public static double getIdf(org.apache.lucene.index.IndexReader reader, String textField, String term) throws IOException
reader
- textField
- term
- IOException
Copyright © 2017. All rights reserved.