|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.illinois.cs.cogcomp.lbj.coref.ir.docs.DocBase
edu.illinois.cs.cogcomp.lbj.coref.ir.docs.DocXMLBase
public abstract class DocXMLBase
The superclass of documents loaded from XML.
Nested Class Summary |
---|
Nested classes/interfaces inherited from class edu.illinois.cs.cogcomp.lbj.coref.ir.docs.DocBase |
---|
DocBase.PosSource |
Field Summary | |
---|---|
private static long |
serialVersionUID
|
Fields inherited from class edu.illinois.cs.cogcomp.lbj.coref.ir.docs.DocBase |
---|
goodEnds, goodStarts, m_annotationAuthor, m_baseFN, m_bNeedsCasing, m_caser, m_dateTime, m_docID, m_docType, m_encoding, m_headline, m_slug, m_source, m_text, m_version, medEnds, totalMentions |
Constructor Summary | |
---|---|
DocXMLBase()
Basic constructor: Not recommended. |
|
DocXMLBase(java.lang.String filename,
java.lang.String ext)
Given the name of a file and the extension, load the file and reads in the XML representation. |
|
DocXMLBase(java.lang.String baseFilename,
java.lang.String ext,
LBJ2.classify.Classifier caser)
|
|
DocXMLBase(java.lang.String filename,
java.lang.String ext,
DocBase.PosSource posSource)
Given the name of a file and the extension, load the file and reads in the XML representation. |
Method Summary | |
---|---|
private Chunk |
findAndProcessChunk(org.w3c.dom.Element parent,
java.lang.String tagName)
Find and load a chunk. |
private boolean |
foundPredEnt(java.lang.String eID)
|
protected abstract java.lang.String |
getBaseFilename(java.lang.String filename)
Trim possible extension from file. |
protected java.lang.String |
getOptAttrib(org.w3c.dom.NamedNodeMap attribs,
java.lang.String attribName,
java.lang.String defaultResult)
|
java.lang.String |
getShortEID(java.lang.String longID)
|
protected abstract Entity |
loadEntity(org.w3c.dom.Node nEntity)
|
protected Relation |
loadRelation(org.w3c.dom.Element node)
Loads a Relation from an xml representation and returns it. |
void |
loadXML(java.lang.String filename)
|
protected java.util.List<Chunk> |
processAttributes(org.w3c.dom.Element parent,
java.lang.String attrName)
Gets all Chunks found inside parent with nodeName attrName . |
protected abstract Chunk |
processChunk(org.w3c.dom.Element element)
Load a chunk. |
protected Mention |
processEntityMention(org.w3c.dom.Element node,
java.lang.String entityID,
java.lang.String entityType,
java.lang.String subtype,
java.lang.String specificity)
Process an mentionType _mention tag. |
private RelationEntityArgument |
processRelationEntityArgument(org.w3c.dom.Element node)
|
private RelationMention |
processRelationMention(org.w3c.dom.Element element)
|
private RelationMentionArgument |
processRelationMentionArgument(org.w3c.dom.Element node)
|
protected abstract java.lang.String |
toXMLString(Chunk c)
|
protected java.lang.String |
toXMLString(Mention m,
java.lang.String linePrefix)
|
protected java.lang.String |
toXMLString(Relation r)
|
private java.lang.String |
toXMLString(RelationEntityArgument a,
int argNum)
|
private java.lang.String |
toXMLString(RelationMentionArgument a,
java.lang.String linePrefix)
|
private java.lang.String |
toXMLString(RelationMention m,
java.lang.String linePrefix)
|
protected java.lang.String |
toXMLString(java.lang.String plainText)
Converts plain text to XML safe format by escaping ampersands. |
abstract void |
write(boolean usePredictions)
Writes this Doc in the appropriate format. |
abstract void |
write(java.lang.String filenameBase,
boolean usePredictions)
Writes this Doc in the appropriate format. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
private static final long serialVersionUID
Constructor Detail |
---|
public DocXMLBase()
public DocXMLBase(java.lang.String filename, java.lang.String ext) throws XMLException
filename
- The filename, which may or may not end with ext
.ext
- The extension of the filename, without a leading period.
XMLException
public DocXMLBase(java.lang.String filename, java.lang.String ext, DocBase.PosSource posSource) throws XMLException
filename
- The filename, which may or may not end with ext
.ext
- The extension of the filename, without a leading period.posSource
- If PosSource.FILE
,
attempts to make the system more exactly
reproduce the previously published results.
This requires a corpus that is preprocessed offline using
CogComp preprocessing tools available at
http://L2R.cs.uiuc.edu/~cogcomp
If PosSource.SNOW
, use a local SNoW based preprocessor
called tagger, located in PATH_POS
environment variable
(which must be exported).
This is generally slow.
Otherwise, uses the LBJ preprocesor (fastest, but performance may differ
from published results).
XMLException
public DocXMLBase(java.lang.String baseFilename, java.lang.String ext, LBJ2.classify.Classifier caser) throws XMLException
XMLException
Method Detail |
---|
public void loadXML(java.lang.String filename) throws XMLException
filename
- file to load containing xml representation.
XMLException
protected abstract Entity loadEntity(org.w3c.dom.Node nEntity) throws XMLException
XMLException
protected Relation loadRelation(org.w3c.dom.Element node) throws XMLException
XMLException
protected java.util.List<Chunk> processAttributes(org.w3c.dom.Element parent, java.lang.String attrName) throws XMLException
attrName
.
parent
- of children that have name attrName
.attrName
- Name of children to extract.
XMLException
protected Mention processEntityMention(org.w3c.dom.Element node, java.lang.String entityID, java.lang.String entityType, java.lang.String subtype, java.lang.String specificity) throws XMLException
mentionType
_mention tag. Must not be
called until counting texts and word split texts have been processed.
node
- A mentionType
_mention nodeentityID
- The ID of the entity that this mentions.specificity
- The specificity ("SPC" or "GEN") of the mention.entityType
- The entity-type.subtype
- The entity-type subtype.
XMLException
- If the XML cannot be processed.private RelationMention processRelationMention(org.w3c.dom.Element element) throws XMLException
XMLException
private RelationMentionArgument processRelationMentionArgument(org.w3c.dom.Element node) throws XMLException
XMLException
private RelationEntityArgument processRelationEntityArgument(org.w3c.dom.Element node)
protected java.lang.String getOptAttrib(org.w3c.dom.NamedNodeMap attribs, java.lang.String attribName, java.lang.String defaultResult)
private Chunk findAndProcessChunk(org.w3c.dom.Element parent, java.lang.String tagName) throws XMLException
parent
- Parent of Node with name tagName
.tagName
- tagName of desired chunk.
XMLException
protected abstract Chunk processChunk(org.w3c.dom.Element element) throws XMLException
element
- An element containing a charseq Element.
XMLException
public java.lang.String getShortEID(java.lang.String longID)
getShortEID
in class DocBase
public abstract void write(boolean usePredictions)
Doc
write
in interface Doc
write
in class DocBase
usePredictions
- Whether predicted mentions and entities
should be written.public abstract void write(java.lang.String filenameBase, boolean usePredictions)
Doc
write
in interface Doc
write
in class DocBase
filenameBase
- The name of the target file.usePredictions
- Whether predicted mentions and entities
should be written.protected abstract java.lang.String getBaseFilename(java.lang.String filename)
protected java.lang.String toXMLString(Mention m, java.lang.String linePrefix)
protected abstract java.lang.String toXMLString(Chunk c)
protected java.lang.String toXMLString(java.lang.String plainText)
protected java.lang.String toXMLString(Relation r)
private boolean foundPredEnt(java.lang.String eID)
private java.lang.String toXMLString(RelationEntityArgument a, int argNum)
private java.lang.String toXMLString(RelationMention m, java.lang.String linePrefix)
private java.lang.String toXMLString(RelationMentionArgument a, java.lang.String linePrefix)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |