edu.illinois.cs.cogcomp.lbj.coref.ir.examples
Class CaseExample
java.lang.Object
edu.illinois.cs.cogcomp.lbj.coref.ir.examples.Example
edu.illinois.cs.cogcomp.lbj.coref.ir.examples.CaseExample
public class CaseExample
- extends Example
Method Summary |
java.lang.String |
getDictCase()
Return getWordCase(), or "newWord" if not in word list. |
java.lang.String |
getDictCase(java.lang.String word)
|
java.lang.String |
getPOS()
|
java.lang.String |
getPOS(int offset)
|
java.lang.String |
getWord()
|
java.lang.String |
getWord(int offset)
|
java.lang.String |
getWordCase()
Returns "allLower", "firstCap", "allCaps", "multiCase", "digit", "punc", or "other"
In case of a single-character uppercase word, returns "firstCap"
Zero length words are "other". |
java.lang.String |
getWordCase(java.lang.String word)
|
java.util.Map<java.lang.String,java.lang.String> |
loadWordList(java.lang.String filename)
|
Methods inherited from class edu.illinois.cs.cogcomp.lbj.coref.ir.examples.Example |
getLabel |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
m_wordList
public static java.util.Map<java.lang.String,java.lang.String> m_wordList
m_doc
public Doc m_doc
m_wordN
public int m_wordN
m_numWords
public int m_numWords
CaseExample
public CaseExample(Doc doc,
int wordN)
getWord
public java.lang.String getWord()
getWord
public java.lang.String getWord(int offset)
getPOS
public java.lang.String getPOS()
getPOS
public java.lang.String getPOS(int offset)
getDictCase
public java.lang.String getDictCase()
- Return getWordCase(), or "newWord" if not in word list.
getDictCase
public java.lang.String getDictCase(java.lang.String word)
getWordCase
public java.lang.String getWordCase()
- Returns "allLower", "firstCap", "allCaps", "multiCase", "digit", "punc", or "other"
In case of a single-character uppercase word, returns "firstCap"
Zero length words are "other".
Words beginning with a digit are "digit". This implies that words
containing internal digits can still be "allLower" or "allUpper".
Words beginning with punctuation are "punc", and word-internal punc is
not considered "punc".
Words beginning with whitespace are "other".
getWordCase
public java.lang.String getWordCase(java.lang.String word)
loadWordList
public java.util.Map<java.lang.String,java.lang.String> loadWordList(java.lang.String filename)