package uk.ac.gla.terrier.structures.indexing.singlepass.hadoop; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.log4j.Logger; import uk.ac.gla.terrier.structures.indexing.singlepass.FieldPosting; import uk.ac.gla.terrier.structures.indexing.singlepass.Posting; public class InvertedIndexRecordReader implements RecordReader{ /** logger for this class */ protected static final Logger logger = Logger.getLogger(InvertedIndexRecordReader.class); /** the configuration of this job */ protected Configuration config; /** the split **/ protected InvertedIndexSplit split; /** the current term in the inverted index **/ int[][] s; /** started reading **/ boolean started; public InvertedIndexRecordReader(JobConf conf, InvertedIndexSplit _split) throws IOException { config = conf; split = _split; started = false; } @Override public void close() throws IOException { } @Override public Text createKey() { return new Text(); } @Override public Posting createValue() { return new Posting(); } @Override public long getPos() throws IOException { return split.getIndex(); } @Override public float getProgress() throws IOException { return split.getLength()/split.getNumDocs(); } @Override public boolean next(Text term, Posting post) throws IOException { // set started to avoid calling close before reading begins started = true; // get the documents for the current term // and if there arn't any return false if (!split.next()) return false; s = split.getNextDocuments(); // convert the documents into a Posting or field Posting as required Posting a; if (split.useFieldInformation) { a = new FieldPosting(); for (int i = 0; i