|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.illinois.cs.cogcomp.lbj.coref.ir.docs.DocBase
public abstract class DocBase
Represents one document from a corpus, including the text, annotations of coreference, relations, entities, and other relevant information. Also contains methods to load input from XML files.
Nested Class Summary | |
---|---|
static class |
DocBase.PosSource
|
Field Summary | |
---|---|
(package private) static int |
goodEnds
|
(package private) static int |
goodStarts
|
protected java.lang.String |
m_annotationAuthor
|
protected java.lang.String |
m_baseFN
|
private java.util.Map<Mention,Mention> |
m_bestMentionMap
|
protected boolean |
m_bNeedsCasing
|
private boolean |
m_bUsePredEntities
|
private boolean |
m_bUsePredMentions
|
protected LBJ2.classify.Classifier |
m_caser
|
private java.util.Map<Pair<Mention,Mention>,CExample> |
m_cExMap
|
private java.util.Map<java.lang.Integer,java.lang.Integer> |
m_charWordMap
|
private ChainSolution<Mention> |
m_corefChains
|
private java.util.Map<java.lang.String,java.lang.Integer> |
m_corpusWordCounts
|
private java.lang.String |
m_countingText
|
protected java.lang.String |
m_dateTime
|
private Aligner<Mention> |
m_defaultAligner
|
protected java.lang.String |
m_docID
|
protected java.lang.String |
m_docType
|
private java.util.Map<java.lang.String,java.lang.Integer> |
m_docWordCounts
|
protected java.lang.String |
m_encoding
|
private java.util.Map<java.lang.Integer,java.util.Set<Mention>> |
m_extentStartWordNumMentionMap
|
private java.util.Map<Mention,GExample> |
m_gExMap
|
protected java.lang.String |
m_headline
|
private java.util.Map<java.lang.Integer,java.util.Map<java.lang.Integer,java.lang.Boolean>> |
m_headPredictionMap
|
private java.util.Map<java.lang.Integer,java.util.Set<Mention>> |
m_headStartWordNumMentionMap
|
private java.lang.Boolean |
m_isCaseSensitive
|
private java.util.Map<Mention,java.util.Set<Mention>> |
m_mentionsContaining
|
private java.util.List<java.util.List<Mention>> |
m_mentsInSents
|
private java.util.Map<java.lang.String,java.lang.Integer> |
m_mentWordCounts
|
private int |
m_nSents
|
private java.util.List<java.util.List<java.lang.String>> |
m_phrases
|
private java.util.List<java.lang.String> |
m_pos
|
private java.util.List<Entity> |
m_predEntities
|
private java.util.List<Mention> |
m_predMentions
|
private java.util.Map<Mention,Mention> |
m_predToTrueMention
|
private java.util.List<java.lang.Integer> |
m_quoteNestLevel
|
private java.util.List<Relation> |
m_relations
|
private java.util.Map<Pair<java.lang.Integer,java.lang.Integer>,Pair<java.util.List<Mention>,java.util.List<Mention>>> |
m_sentenceMentionsPair
|
protected java.lang.String |
m_slug
|
protected java.lang.String |
m_source
|
protected java.lang.String |
m_text
|
private int |
m_textStartCharNum
|
private java.util.List<Entity> |
m_trueEntities
|
private java.util.List<Mention> |
m_trueMentions
|
private boolean |
m_trueMentionsSorted
|
protected java.lang.String |
m_version
|
private java.util.Map<java.lang.Integer,java.lang.Integer> |
m_wordNumCharNumMap
|
private java.util.Map<java.lang.Integer,java.lang.Integer> |
m_wordNumSentNumMap
|
private java.util.List<java.lang.String> |
m_words
|
(package private) static int |
medEnds
|
private static long |
serialVersionUID
|
(package private) static int |
totalMentions
|
Constructor Summary | |
---|---|
DocBase()
Basic constructor: Not recommended. |
Method Summary | |
---|---|
void |
addHeadPrediction(int firstWN,
int lastWN,
boolean pred)
|
protected void |
addPredEntities(java.util.List<Entity> ents)
Backed internally. |
protected void |
addRelation(Relation r)
|
protected void |
addTrueEntity(Entity e)
Can be made public, but then need to ensure that e's mentions are all added. |
protected void |
addTrueMention(Mention m)
|
protected void |
alignPredMentsToTrue()
|
protected void |
buildMentionsContaining()
|
protected void |
buildMentionsInSents()
|
void |
calcAndSetQuotes()
Determines the location of quotes and sets them. |
Mention |
getBestMentionFor(Mention m)
Gets the canonical mention of the entity containing m . |
CExample |
getCExampleFor(Mention m1,
Mention m2)
Returns the unique CExample for the given pair of mentions
in the given order. |
java.util.Map<Entity,java.util.Map<java.lang.Integer,java.lang.String>> |
getCoherenceInfo()
Gets the coherence info using the value of usePredictedEntities() to determine whether
to use predicted entities. |
java.util.Map<Entity,java.util.Map<java.lang.Integer,java.lang.String>> |
getCoherenceInfo(boolean usePred)
Gets a grid indicating the mention type for each combination of entities and sentences. |
ChainSolution<Mention> |
getCorefChains()
Gets the partition of mentions into coreferential sets. |
java.lang.String |
getDocID()
Gets the ID for this document, as a string. |
java.util.List<Entity> |
getEntities()
Gets the entities, in no particular order. |
Entity |
getEntityFor(Mention m)
Currently implemented slowly. |
Entity |
getEntityFor(Mention m,
boolean usePred)
Currently implemented slowly. |
protected Entity |
getEntityFor(Mention m,
java.util.List<Entity> entities)
Currently implemented slowly. |
GExample |
getGExampleFor(Mention m)
Returns the unique GExample for the given pair of mentions
in the given order. |
boolean |
getHeadPrediction(int firstWN,
int lastWN)
|
double |
getInCorpusInverseFreq(java.lang.String word)
Gets the inverse of the number of occurrences of the specified word in the corpus. |
double |
getInDocInverseFreq(java.lang.String word)
Gets the inverse of the number of occurrences of the specified word in the document. |
double |
getInverseTrueHeadFreq(int wordNum)
Gets the inverse true head frequency of the word at the specified position. |
double |
getInverseTrueHeadFreq(java.lang.String word)
Gets the inverse of the number of occurrences of the specified word in the heads of the true mentions in the document. |
Mention |
getMention(int n)
|
java.util.List<Mention> |
getMentions()
Gets the mentions of the document, sorted (typically in document order). |
java.util.Set<Mention> |
getMentionsContainedIn(Mention m)
Gets the set of mentions whose head is entirely contained within a specified mention's extent, including the specified mention itself. |
java.util.Set<Mention> |
getMentionsContaining(Mention m)
Gets the set of mentions whose extents entirely contain a specified mention's extent, including the specified mention itself. |
java.util.List<Mention> |
getMentionsInSent(int sentNum)
Gets a list of the mentions in a specified sentence in order. |
Pair<java.util.List<Mention>,java.util.List<Mention>> |
getMentionsInSentences(int s1,
int s2)
Gets a pair of lists of mentions, one for each of the two specified sentences. |
java.util.Set<Mention> |
getMentionsWithExtentStartingAt(int startWord)
Returns the set of mentions whose extents start at the specified word number, or an empty set if none are found. |
java.util.Set<Mention> |
getMentionsWithHeadStartingAt(int startWord)
Returns the set of mentions whose heads start at the specified word number, or an empty set if none are found. |
int |
getNumMentions()
|
int |
getNumRelations()
Gets the number of relations. |
int |
getNumSentences()
Returns the number of sentences in the document. |
java.lang.String |
getPlainText()
Gets the text that is the basis for counting, including the start/end characters in Chunk objects. |
java.util.List<java.lang.String> |
getPOS()
Gets a list of the Part-Of-Speech tags for the words of the document. |
java.lang.String |
getPOS(int posNum)
Gets the Part-Of-Speech tag for the word at the posNum
position in the document. |
java.util.List<Entity> |
getPredEntities()
Gets a list of predicted entities, in no particular order. |
Mention |
getPredMention(int n)
|
java.util.List<Mention> |
getPredMentions()
Gets a sorted list of predicted mentions. |
int |
getQuoteNestLevel(int wordNum)
Indicates the number of nested quotes the specified word is in. |
Relation |
getRelation(int number)
Gets the specified relation. |
int |
getSentNum(int wordNum)
Gets the sentence number for the specified word. |
java.lang.String |
getShortEID(java.lang.String longID)
|
int |
getStartCharNum(int wordNum)
Gets the zero-based position of the first character of a word. |
int |
getTextFirstWordNum()
Gets the word number of the first word in the main text of the document (as distinguished from headlines and metadata that may be included in the plain text.) |
java.util.List<Entity> |
getTrueEntities()
Gets a list of true entities, in no particular order. |
Mention |
getTrueMention(int n)
|
Mention |
getTrueMentionFor(Mention pred)
Gets the true mention aligned with the specified mention. |
java.util.List<Mention> |
getTrueMentions()
Gets a sorted list of true mentions. |
java.util.Map<java.lang.String,java.lang.Integer> |
getWholeDocCounts()
Gets the counts for the words in the document. |
java.lang.String |
getWord(int wordNum)
Gets the specified word. |
int |
getWordNum(int charNum)
Determines the word number (zero-based) of the word at charNum ,
or if no word is at charNum, return the word number of the closest
word appearing after charNum, or if no such word exists, return -1. |
java.util.List<java.lang.String> |
getWords()
Gets a list of the surface forms of the words of the document. |
boolean |
hasHeadPrediction(int firstWN,
int lastWN)
Checks to see whether a prediction has been stored for whether the closed interval [firstWN, lastWN] word sequence is a head. |
boolean |
hasPredEntities()
Indicates whether predicted entities are available. |
boolean |
hasPredMentions()
Indicates whether predicted mentions have been set. |
boolean |
hasTrueEntities()
Indicates whether true entities are available. |
boolean |
hasTrueMentions()
Indicates whether true mentions have been set. |
protected void |
initMembersDefault()
|
boolean |
isCaseSensitive()
Indicates whether the document is case sensitive. |
protected void |
loadChunkedText(java.lang.String filename)
Loads text that has been preprocessed offline. |
void |
loadFromText(java.lang.String plainText)
Builds the document from the given plain text, automatically splitting sentences, determining quote levels, determining part-of-speech tags, and splitting words by an automatic word-splitting algorithm. |
void |
loadFromText(java.lang.String plainText,
boolean doWordSplit,
boolean doPOSTag)
Builds the document from the given plain text, automatically splitting sentences, determining quote levels, determining part-of-speech tags, and either splitting words by whitespace or using a word-splitter. |
protected java.lang.String |
loadPOSTaggerOutput()
Loads the output of the SNoW-based POS tagger. |
protected void |
loadPOSTags(java.lang.String content)
Loads text that has been preprocessed. |
void |
loadSGMText(java.lang.String filename)
|
protected java.util.Map<Mention,Mention> |
makeBestMentionMap()
|
Chunk |
makeChunk(int startWord,
int endWord)
Create a chunk spanning the specified words in this document. |
static void |
printChunkValidity()
Verify that all mentions start and end on phrase boundaries. |
protected void |
recordWordLocation(int wn,
int startCN,
int endCN)
Records the fact that a word is located at characters startCN
through endCN (inclusive). |
protected java.lang.String |
removeTagsAndExtraNL(java.lang.String a)
|
protected java.lang.String |
repeat(java.lang.String s,
int n)
|
void |
save()
Writes the document to a file using serialization. |
void |
setCorpusCounts(java.util.Map<java.lang.String,java.lang.Integer> counts)
Sets the corpus counts for the words in the corpus. |
protected void |
setPlainText(java.lang.String text)
Should be set before words are set. |
protected void |
setPOSTags(java.util.List<java.lang.String> tags)
Sets the POS tags. |
void |
setPredEntities(ChainSolution<Mention> sol)
Sets the predicted entities to be those specified by sol . |
void |
setPredictedMentions(java.util.Collection<Mention> ments)
Sets the predicted mentions and records a preference for using them. |
void |
setQuoteLevels(java.util.List<java.lang.Integer> quoteLevels)
Sets the quote levels, which indicate the number of nested quotations in which each word is embedded. |
protected void |
setSentenceNumbers(java.util.List<java.lang.Integer> sentNums)
Sets the sentence numbers for each word. |
void |
setUsePredictedEntities(boolean usePred)
Sets the preference for using predicted entities or true entities. |
void |
setUsePredictedMentions(boolean usePred)
Sets the preference for using predicted mentions or true mentions. |
void |
setWords(java.util.List<java.lang.String> words)
|
void |
setWords(java.util.List<java.lang.String> words,
boolean backwardsCompatible)
Sets the words, aligns them with the plain text, and records statistics about them. |
protected java.util.List<Entity> |
sortEntitiesByListOrder(java.util.List<Entity> ents,
java.util.List<Entity> ordered)
Does NOT modify in place (but this may change). |
protected void |
sortPredictedMentions()
Sorts predicted mentions in natural order, which is the textual order by default. |
protected void |
sortTrueMentions()
Sorts true mentions in natural order, which is the textual order by default. |
java.lang.String |
toAnnotatedString(boolean showPOS)
Gets the document as a string annotated with mention boundaries, with square brackets for true mentions, asterisks for false alarms, and triangle brackets for missed mentions, and optionally annotated with Part-Of-Speech tags. |
java.lang.String |
toAnnotatedString(boolean showPOS,
boolean showMTypes,
boolean showETypes,
boolean showEIDs)
Gets the document as a string annotated with mention boundaries, with square brackets for true mentions, asterisks for false alarms, and triangle brackets for missed mentions, and optionally annotated with Part-Of-Speech tags, mention types, entity types, and entity IDs. |
java.lang.String |
toCoherenceTableString()
Gets the coherence grid represented as a string, laid out in a grid. |
java.lang.String |
toCoherenceTableString(boolean usePred)
Gets the coherence grid represented as a string, laid out in a grid. |
java.lang.String |
toString()
|
java.lang.String |
toSubstituteString()
Gets the document as a string where each mention has been replaced by the most specific mention coreferential with it. |
protected java.lang.String |
translateEscaped(java.lang.String escaped,
int cursor)
Translates an escaped round, square, or curly brace escaped as -LBR- or -RBR-, or an escaped pair of quotes, escaped as a double quote charaacter. |
boolean |
usePredictedEntities()
Indicates whether requests for entities will return predicted entities or true entities. |
boolean |
usePredictedMentions()
Indicates whether requests for mentions will return predicted mentions or true mentions. |
void |
write(boolean usePredictions)
Writes this Doc in the appropriate format. |
abstract void |
write(java.lang.String filename,
boolean usePredictions)
Writes this Doc in the appropriate format. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
private static final long serialVersionUID
protected java.lang.String m_baseFN
static int totalMentions
static int goodStarts
static int goodEnds
static int medEnds
private boolean m_bUsePredEntities
private java.util.List<Entity> m_trueEntities
private java.util.List<Entity> m_predEntities
private ChainSolution<Mention> m_corefChains
private java.util.List<Relation> m_relations
private boolean m_bUsePredMentions
private java.util.List<Mention> m_trueMentions
private boolean m_trueMentionsSorted
private java.util.List<Mention> m_predMentions
private Aligner<Mention> m_defaultAligner
private java.util.Map<Mention,Mention> m_predToTrueMention
protected LBJ2.classify.Classifier m_caser
protected boolean m_bNeedsCasing
private java.util.List<java.util.List<java.lang.String>> m_phrases
protected java.lang.String m_source
protected java.lang.String m_docType
protected java.lang.String m_version
protected java.lang.String m_annotationAuthor
protected java.lang.String m_encoding
protected java.lang.String m_docID
protected java.lang.String m_slug
protected java.lang.String m_dateTime
protected java.lang.String m_headline
protected java.lang.String m_text
private int m_textStartCharNum
private java.util.List<java.lang.String> m_words
private java.util.List<java.lang.String> m_pos
private java.util.List<java.lang.Integer> m_quoteNestLevel
private java.util.Map<java.lang.String,java.lang.Integer> m_mentWordCounts
private java.util.Map<java.lang.String,java.lang.Integer> m_docWordCounts
private java.util.Map<java.lang.String,java.lang.Integer> m_corpusWordCounts
private java.lang.String m_countingText
private java.util.Map<java.lang.Integer,java.util.Set<Mention>> m_headStartWordNumMentionMap
private java.util.Map<java.lang.Integer,java.util.Set<Mention>> m_extentStartWordNumMentionMap
private java.util.Map<java.lang.Integer,java.lang.Integer> m_charWordMap
private int m_nSents
private java.util.Map<java.lang.Integer,java.lang.Integer> m_wordNumSentNumMap
private java.util.Map<java.lang.Integer,java.lang.Integer> m_wordNumCharNumMap
private java.util.Map<Pair<java.lang.Integer,java.lang.Integer>,Pair<java.util.List<Mention>,java.util.List<Mention>>> m_sentenceMentionsPair
private java.util.List<java.util.List<Mention>> m_mentsInSents
private java.util.Map<Mention,java.util.Set<Mention>> m_mentionsContaining
private java.util.Map<Mention,Mention> m_bestMentionMap
private java.util.Map<java.lang.Integer,java.util.Map<java.lang.Integer,java.lang.Boolean>> m_headPredictionMap
private java.util.Map<Pair<Mention,Mention>,CExample> m_cExMap
private java.util.Map<Mention,GExample> m_gExMap
private java.lang.Boolean m_isCaseSensitive
Constructor Detail |
---|
public DocBase()
Method Detail |
---|
protected void initMembersDefault()
public void loadSGMText(java.lang.String filename)
filename
- The file containing the text of the document.
XMLException
protected java.lang.String removeTagsAndExtraNL(java.lang.String a)
protected void loadPOSTags(java.lang.String content)
content
- The text annotated with part of speech tags.protected java.lang.String loadPOSTaggerOutput()
protected void loadChunkedText(java.lang.String filename)
filename
- The name of a file containing the chunked text.protected java.lang.String translateEscaped(java.lang.String escaped, int cursor)
public void calcAndSetQuotes()
public void loadFromText(java.lang.String plainText)
plainText
- The text of the document.public void loadFromText(java.lang.String plainText, boolean doWordSplit, boolean doPOSTag)
plainText
- The text of the document.doWordSplit
- If true, words will be split by
an automatic word-splitting algorithm; otherwise
words will be assumed to be separated by whitespace.doPOSTag
- If true, POS tags will be generated by the LBJPOS
algorithm. Otherwise, no tags will be set.protected void setPlainText(java.lang.String text)
text
- The plain text, used for determining character counts.public void setWords(java.util.List<java.lang.String> words)
public void setWords(java.util.List<java.lang.String> words, boolean backwardsCompatible)
setPlainText()
has been called.
words
- The words (copied defensively).backwardsCompatible
- Attempt to alter the algorithm
to conform to behavior in previous published paper.protected void setPOSTags(java.util.List<java.lang.String> tags)
setWords()
tags
- A list of POS tags,
in the same order as the words (copied defensively).
java.lang.IllegalArgumentException
- if tags.size() != words.size()
public void setQuoteLevels(java.util.List<java.lang.Integer> quoteLevels)
setWords()
quoteLevels
- A list of quote levels,
in the same order as the words (copied defensively).
java.lang.IllegalArgumentException
- if quoteLevels.size() != words.size()
protected void setSentenceNumbers(java.util.List<java.lang.Integer> sentNums)
setWords()
sentNums
- A list of sentence numbers,
in the same order as the words (copied defensively).
java.lang.IllegalArgumentException
- if sentNums.size() != words.size()
or if sentNums
is non-monotonic.protected void recordWordLocation(int wn, int startCN, int endCN)
startCN
through endCN
(inclusive).
public java.lang.String getPlainText()
Doc
getPlainText
in interface Doc
public java.lang.String getDocID()
Doc
getDocID
in interface Doc
public boolean isCaseSensitive()
Doc
isCaseSensitive
in interface Doc
public int getSentNum(int wordNum)
Doc
getSentNum
in interface Doc
wordNum
- the zero-based position of the word whose
sentence number is desired.
public int getNumSentences()
Doc
getNumSentences
in interface Doc
public void setUsePredictedEntities(boolean usePred)
Doc
setUsePredictedEntities
in interface Doc
usePred
- if true
, prefer to use predicted entities,
otherwise, prefer true entities.public boolean usePredictedEntities()
Doc
usePredictedEntities
in interface Doc
public java.util.List<Entity> getEntities()
Doc
Doc.usePredictedEntities()
and predicted
entities are available, return them;
otherwise return true entities.
getEntities
in interface Doc
public java.util.List<Entity> getPredEntities()
Doc
getPredEntities
in interface Doc
public java.util.List<Entity> getTrueEntities()
Doc
getTrueEntities
in interface Doc
public ChainSolution<Mention> getCorefChains()
Doc
getCorefChains
in interface Doc
public Entity getEntityFor(Mention m)
getEntityFor
in interface Doc
m
- The mention whose entity is desired.
m
, or null if not found.public Entity getEntityFor(Mention m, boolean usePred)
getEntityFor
in interface Doc
m
- The mention whose entity is desired.usePred
- Whether to return a predicted entity or a true entity.
m
, or null if the entity
of the specified type is not available.protected Entity getEntityFor(Mention m, java.util.List<Entity> entities)
protected void addTrueEntity(Entity e)
public void setPredEntities(ChainSolution<Mention> sol)
Doc
sol
.
Entity IDs are automatically created, and each mention's
setPredictedEntityID()
method is called.
Also sets usePredictedEntities to true
.
The entities are backed internally, but the mentions are not duplicated.
setPredEntities
in interface Doc
sol
- The partition of mentions from which to derive entities.public boolean hasPredEntities()
Doc
hasPredEntities
in interface Doc
public boolean hasTrueEntities()
Doc
hasTrueEntities
in interface Doc
protected void addPredEntities(java.util.List<Entity> ents)
public CExample getCExampleFor(Mention m1, Mention m2)
Doc
CExample
for the given pair of mentions
in the given order.
Doc is the head of a collection of related examples;
as such, it needs to return the same CExample
any time
an inference-based classifier is used.
getCExampleFor
in interface Doc
m1
- The first mention.m2
- The second mention.
CExample
referring to
the ordered pair m1, m2
.public GExample getGExampleFor(Mention m)
Doc
GExample
for the given pair of mentions
in the given order.
Doc is the head of a collection of related examples;
as such, it needs to return the same GExample
any time
an inference-based classifier is used.
getGExampleFor
in interface Doc
m
- The mention.
GExample
referring to
the ordered pair m1, m2
.public void setUsePredictedMentions(boolean usePred)
Doc
setUsePredictedMentions
in interface Doc
usePred
- if true
, prefer to use predicted mentions,
otherwise, prefer true mentions.public boolean usePredictedMentions()
Doc
usePredictedMentions
in interface Doc
public java.util.List<Mention> getMentions()
Doc
usePredictedMentions()
.
getMentions
in interface Doc
public java.util.List<Mention> getPredMentions()
Doc
getPredMentions
in interface Doc
public java.util.List<Mention> getTrueMentions()
Doc
getTrueMentions
in interface Doc
public boolean hasPredMentions()
Doc
hasPredMentions
in interface Doc
public boolean hasTrueMentions()
Doc
hasTrueMentions
in interface Doc
public void setPredictedMentions(java.util.Collection<Mention> ments)
Doc
setPredictedMentions
in interface Doc
ments
- The predicted mentions (copied defensively).protected void alignPredMentsToTrue()
protected void addTrueMention(Mention m)
public int getNumMentions()
protected void sortTrueMentions()
Mention.compareTo(Mention)
protected void sortPredictedMentions()
Mention.compareTo(Mention)
public Mention getMention(int n)
public Mention getPredMention(int n)
public Mention getTrueMention(int n)
public Mention getTrueMentionFor(Mention pred)
Doc
getTrueMentionFor
in interface Doc
pred
- A predicted mention.
pred
.public Mention getBestMentionFor(Mention m)
Doc
m
.
getBestMentionFor
in interface Doc
m
- A mention.
m
.public java.util.Set<Mention> getMentionsWithHeadStartingAt(int startWord)
Doc
getMentionsWithHeadStartingAt
in interface Doc
startWord
- A word number.
startWord
.public java.util.Set<Mention> getMentionsWithExtentStartingAt(int startWord)
Doc
getMentionsWithExtentStartingAt
in interface Doc
startWord
- A word number.
startWord
.public java.util.Set<Mention> getMentionsContainedIn(Mention m)
Doc
getMentions()
.
getMentionsContainedIn
in interface Doc
m
- The specified mention.
m
.public java.util.Set<Mention> getMentionsContaining(Mention m)
Doc
getMentions()
.
getMentionsContaining
in interface Doc
m
- The specified mention.
m
.
Returns predicted or true mentions according to what getMentions()
returns.protected void buildMentionsContaining()
public java.util.List<Mention> getMentionsInSent(int sentNum)
Doc
usePredictedMentions()
.
getMentionsInSent
in interface Doc
sentNum
- The number of the specified sentence.
protected void buildMentionsInSents()
public Pair<java.util.List<Mention>,java.util.List<Mention>> getMentionsInSentences(int s1, int s2)
Doc
getMentionsInSentences
in interface Doc
s1
- The number of the first sentence.s2
- The number of the second sentence.
public Chunk makeChunk(int startWord, int endWord)
Doc
makeChunk
in interface Doc
startWord
- The position of the first word in desired chunk.endWord
- The position of the last word in the desired chunk.
public java.util.List<java.lang.String> getWords()
Doc
getWords
in interface Doc
public java.lang.String getWord(int wordNum)
Doc
getWord
in interface Doc
wordNum
- The position of the specified word
(as an index into a List
).
wordNum
th word as a string.public java.util.List<java.lang.String> getPOS()
Doc
getPOS
in interface Doc
POSTagger
public java.lang.String getPOS(int posNum)
Doc
posNum
position in the document.
getPOS
in interface Doc
posNum
- The position of the word whose POS tag should be returned.
POSTagger
public int getWordNum(int charNum)
Doc
charNum
,
or if no word is at charNum, return the word number of the closest
word appearing after charNum, or if no such word exists, return -1.
getWordNum
in interface Doc
charNum
- The character number.
public int getTextFirstWordNum()
Doc
getTextFirstWordNum
in interface Doc
public int getStartCharNum(int wordNum)
Doc
getStartCharNum
in interface Doc
wordNum
- The zero-based position of the word in the document.
-1
if wordNum
is invalid.public int getQuoteNestLevel(int wordNum)
Doc
getQuoteNestLevel
in interface Doc
wordNum
- The position of the specified word.
public double getInverseTrueHeadFreq(int wordNum)
Doc
getInverseTrueHeadFreq
in interface Doc
wordNum
- The position in the document of the specified word.
Doc.getInverseTrueHeadFreq(String)
public double getInverseTrueHeadFreq(java.lang.String word)
Doc
getInverseTrueHeadFreq
in interface Doc
word
- The specified word.
public double getInDocInverseFreq(java.lang.String word)
Doc
getInDocInverseFreq
in interface Doc
word
- The specified word.
public double getInCorpusInverseFreq(java.lang.String word)
Doc
getInCorpusInverseFreq
in interface Doc
word
- The specified word.
public java.util.Map<java.lang.String,java.lang.Integer> getWholeDocCounts()
Doc
getWholeDocCounts
in interface Doc
public void setCorpusCounts(java.util.Map<java.lang.String,java.lang.Integer> counts)
Doc
setCorpusCounts
in interface Doc
counts
- A map from words to counts of words in the corpus.public int getNumRelations()
Doc
getNumRelations
in interface Doc
public Relation getRelation(int number)
Doc
getRelation
in interface Doc
number
- the number of the desired relation.
protected void addRelation(Relation r)
public boolean hasHeadPrediction(int firstWN, int lastWN)
public boolean getHeadPrediction(int firstWN, int lastWN)
public void addHeadPrediction(int firstWN, int lastWN, boolean pred)
public java.lang.String toString()
toString
in class java.lang.Object
public java.lang.String toAnnotatedString(boolean showPOS, boolean showMTypes, boolean showETypes, boolean showEIDs)
Doc
toAnnotatedString
in interface Doc
showPOS
- Whether the Part-Of-Speech tags should be shown.showMTypes
- Whether mention types should be shown.showETypes
- Whether entity types should be shown.showEIDs
- Whether entity IDs should be shown.
public java.lang.String toAnnotatedString(boolean showPOS)
Doc
toAnnotatedString
in interface Doc
showPOS
- Whether the Part-Of-Speech tags should be shown.
public java.lang.String toSubstituteString()
Doc
toSubstituteString
in interface Doc
getMentions()
.protected java.util.Map<Mention,Mention> makeBestMentionMap()
public java.util.Map<Entity,java.util.Map<java.lang.Integer,java.lang.String>> getCoherenceInfo(boolean usePred)
Doc
getCoherenceInfo
in interface Doc
usePred
- Whether predicted entities should be used.
public java.util.Map<Entity,java.util.Map<java.lang.Integer,java.lang.String>> getCoherenceInfo()
Doc
usePredictedEntities()
to determine whether
to use predicted entities.
getCoherenceInfo
in interface Doc
public java.lang.String toCoherenceTableString(boolean usePred)
Doc
toCoherenceTableString
in interface Doc
Doc.getCoherenceInfo()
public java.lang.String toCoherenceTableString()
Doc
usePredictedEntities()
.
toCoherenceTableString
in interface Doc
Doc.getCoherenceInfo()
protected java.lang.String repeat(java.lang.String s, int n)
protected java.util.List<Entity> sortEntitiesByListOrder(java.util.List<Entity> ents, java.util.List<Entity> ordered)
public java.lang.String getShortEID(java.lang.String longID)
public void save() throws java.io.IOException
Doc
save
in interface Doc
java.io.IOException
public void write(boolean usePredictions)
Doc
write
in interface Doc
usePredictions
- Whether predicted mentions and entities
should be written.public abstract void write(java.lang.String filename, boolean usePredictions)
Doc
write
in interface Doc
filename
- The name of the target file.usePredictions
- Whether predicted mentions and entities
should be written.public static void printChunkValidity()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |