/*
* LingPipe v. 3.7
* Copyright (C) 2003-2008 Alias-i
*
* This program is licensed under the Alias-i Royalty Free License
* Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i
* Royalty Free License Version 1 for more details.
*
* You should have received a copy of the Alias-i Royalty Free License
* Version 1 along with this program; if not, visit
* http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
* Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
* +1 (718) 290-9170.
*/
package com.aliasi.chunk;
import com.aliasi.util.BoundedPriorityQueue;
import com.aliasi.util.ScoredObject;
import com.aliasi.util.Strings;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* A RescoringChunker provides first best, n-best and
* confidence chunking by rescoring n-best chunkings derived from a
* contained chunker.
*
*
Concrete subclasses must implement the abstract method {@link * #rescore(Chunking)}, which provides a score for a chunking. There * are no restrictions on how this score is computed; most typically, * it will be a longer-distance/higher-order model than the contained * chunker and provide more accurate results. * *
The n-best chunker works by generating the top analyses from the * contained chunker. The number of such analyses considered is * determined in the constructor for this class. These are then * placed in a bounded priority queue with the bound determined by the * maximum specified in the call to {@link * #nBest(char[],int,int,int)}. *
The first-best chunker methods {@link #chunk(CharSequence)} and * {@link #chunk(char[],int,int)} operate by choosing the top scoring * chunking from the rescoring of the contained chunker. The number * of chunkings from the contained chunker that are rescored is * determined in the constructor. This is more memory and time * efficient than running the n-best chunking. * *
The rescoring should be in the form of log (base 2) joint
* probability estimate for the specified chunking. For the
* simple whole-analysis rescoring method {@link
* #nBest(char[],int,int,int)}, this is not checked, and any
* values may be used in practice. For the n-best chunk method
* {@link #nBestChunks(char[],int,int,int)}, the scores are
* treated as log probabilities, but renormalized in order to
* compute conditional chunk probability estimates.
*
* @param chunking Chunking to rescore.
* @return The new score for this chunking.
*/
public abstract double rescore(Chunking chunking);
/**
* The base chunker that generates hypotheses to rescore. Note
* that this is the actual chunker used by this class, so any
* changes to it will affect this class's behavior. Common changes
* involve setting the underlying chunker's configuration.
*
* @return The base chunker.
*/
public B baseChunker() {
return mChunker;
}
/**
* Return the number of chunkings to generate from the base
* chunker for rescoring.
*
* @return The number of base chunkings to rescore.
*/
public int numChunkingsRescored() {
return mNumChunkingsRescored;
}
/**
* Set the number of base chunkings to rescore. This value will
* be used in every chunking method to determine the underlying
* number of chunkings considered.
*
* @param numChunkingsRescored Number of base chunkings to
* rescore.
*/
public void setNumChunkingsRescored(int numChunkingsRescored) {
mNumChunkingsRescored = numChunkingsRescored;
}
/**
* Returns the first-best chunking for the specified character
* sequence. See the class documentation above for implementation
* details.
*
* @param cSeq Character sequence to chunk.
* @return First-best chunking of the specified character sequence.
*/
public Chunking chunk(CharSequence cSeq) {
char[] cs = Strings.toCharArray(cSeq);
return chunk(cs,0,cs.length);
}
/**
* Returns the first-best chunking for the specified character
* slice. See the class documentation above for implementation
* details.
*
* @param cs Underlying character array.
* @param start Index of first character to analyze.
* @param end Index of one past the last character to analyze.
* @return First-best chunking of the specified character slice.
*/
public Chunking chunk(char[] cs, int start, int end) {
return firstBest(mChunker.nBest(cs,start,end,mNumChunkingsRescored));
}
/**
* Returns the n-best chunkings of the specified character slice.
* See the class documentation above for implementation details.
*
* @param cs Underlying character array.
* @param start Index of first character to analyze.
* @param end Index of one past the last character to analyze.
* @return Iterator over the n-best chunkings of the specified
* character slice.
*/
public Iterator See the class documentation above for implementation details.
*
* @param cs Underlying characters.
* @param start Index of first character in slice.
* @param end Index of one past last character in slice.
* @param maxNBest Maximum number of chunks to return.
*/
public Iterator