Index: src/uk/ac/gla/terrier/utility/ApplicationSetup.java
===================================================================
--- src/uk/ac/gla/terrier/utility/ApplicationSetup.java	(revision 2526)
+++ src/uk/ac/gla/terrier/utility/ApplicationSetup.java	(working copy)
@@ -230,7 +230,7 @@
 	 * property is <tt>if.suffix</tt> and by default
 	 * the value of this property is <tt>.if</tt>
 	 */
-	public static String IFSUFFIX;
+	//public static String IFSUFFIX;
 	
 	/**
 	 * The suffix of the file that contains the
@@ -238,7 +238,7 @@
 	 * <tt>lexicon.suffix</tt> and by default 
 	 * the value of this property is <tt>.lex</tt>
 	 */
-	public static String LEXICONSUFFIX;
+	//public static String LEXICONSUFFIX;
 	
 	/**
 	 * The suffix of the file that contains the
@@ -255,11 +255,11 @@
 	 * property is <tt>lexicon.index.suffix</tt> and
 	 * by default its value is .lexid.
 	 */
-	public static String LEXICON_INDEX_SUFFIX;
+	//public static String LEXICON_INDEX_SUFFIX;
 
 	/** The suffix of the lexicon hash file. Corresponding property
      * is <tt>lexicon.hash.suffix</tt>, default is ".lexhash". */
-	public static String LEXICON_HASH_SUFFIX;
+	//public static String LEXICON_HASH_SUFFIX;
 
 	
 	/**
@@ -289,7 +289,7 @@
 	 * lexicon files. It corresponds to the property 
 	 * <tt>merge.prefix</tt> and the default value is <tt>MRG_</tt>.
 	 */
-	public static String MERGE_PREFIX;
+	//public static String MERGE_PREFIX;
 	
 	/**
 	 * A progressive number which is assigned to the 
@@ -299,7 +299,7 @@
 	 * the property <tt>merge.temp.number</tt> and the default value
 	 * is <tt>100000</tt>
 	 */
-	public static int MERGE_TEMP_NUMBER;
+	//public static int MERGE_TEMP_NUMBER;
 	
 	/**
 	 * The number of documents to be processed as a group during indexing.
@@ -308,7 +308,7 @@
 	 * create a single lexicon. It corresponds to the property 
 	 * <tt>bundle.size</tt> and the default value is <tt>2000</tt>.
 	 */
-	public static int BUNDLE_SIZE;
+	//public static int BUNDLE_SIZE;
 	
 	/**
 	 * The number of bytes used to store a term. Corresponds to MAX_TERM_LENGTH
@@ -346,16 +346,16 @@
 	public static String TERRIER_INDEX_PREFIX;
 	
 	/** The filename of the inverted file.*/
-	public static String INVERTED_FILENAME;
+	//public static String INVERTED_FILENAME;
 	/** The filename of the direct file.*/
 	public static String DIRECT_FILENAME;
 	/** The filename of the document index.*/
 	public static String DOCUMENT_INDEX_FILENAME;
 	/** The filename of the lexicon file.*/
-	public static String LEXICON_FILENAME;
+	//public static String LEXICON_FILENAME;
 	
 	/** The filename of the lexicon index file.*/
-	public static String LEXICON_INDEX_FILENAME;
+	//public static String LEXICON_INDEX_FILENAME;
 	/** The filename of the log (statistics) file.*/
 	public static String LOG_FILENAME;
 	
@@ -532,10 +532,10 @@
 		//The following properties specify the filenames and suffixes
 		COLLECTION_SPEC = makeAbsolute(getProperty("collection.spec", "collection.spec"), TERRIER_ETC);
 	
-		IFSUFFIX = getProperty("if.suffix", ".if");
-		LEXICONSUFFIX = getProperty("lexicon.suffix", ".lex");
-		LEXICON_INDEX_SUFFIX = getProperty("lexicon.index.suffix", ".lexid");
-		LEXICON_HASH_SUFFIX = getProperty("lexicon.hash.suffix",".lexhash");
+		//IFSUFFIX = getProperty("if.suffix", ".if");
+		//LEXICONSUFFIX = getProperty("lexicon.suffix", ".lex");
+		//LEXICON_INDEX_SUFFIX = getProperty("lexicon.index.suffix", ".lexid");
+		//LEXICON_HASH_SUFFIX = getProperty("lexicon.hash.suffix",".lexhash");
 		DOC_INDEX_SUFFIX = getProperty("doc.index.suffix", ".docid");
 		LOG_SUFFIX = getProperty("log.suffix", ".log");
 		DF_SUFFIX = getProperty("df.suffix", ".df");
@@ -545,8 +545,8 @@
 		//documents. The prefix mergepref and and the number prog.nr 
 		//specify the names of the temporary lexicon created 
 		//during creating a global lexicon.
-		MERGE_PREFIX = getProperty("merge.prefix", "MRG_");
-		MERGE_TEMP_NUMBER = Integer.parseInt(getProperty("merge.temp.number", "100000"));
+		//MERGE_PREFIX = getProperty("merge.prefix", "MRG_");
+		//MERGE_TEMP_NUMBER = Integer.parseInt(getProperty("merge.temp.number", "100000"));
 		
 		//if a document is empty, that is it does not contain any terms, 
 		//we have the option to add it to the index, or not. By default, 
@@ -555,7 +555,7 @@
 		
 		//During the indexing process, we process and create temporary structures
 		//for bundle.size files.
-		BUNDLE_SIZE = Integer.parseInt(getProperty("bundle.size", "2000"));
+		//BUNDLE_SIZE = Integer.parseInt(getProperty("bundle.size", "2000"));
 		
 		//the maximum size of a term (string)
 		MAX_TERM_LENGTH = Integer.parseInt(getProperty("max.term.length", "20"));
@@ -693,11 +693,11 @@
 	 */
 	public static void setupFilenames() {
 		String filenameTemplate = TERRIER_INDEX_PATH + FILE_SEPARATOR + TERRIER_INDEX_PREFIX;
-		INVERTED_FILENAME =filenameTemplate + IFSUFFIX;
+		//INVERTED_FILENAME =filenameTemplate + IFSUFFIX;
 		DIRECT_FILENAME = filenameTemplate + DF_SUFFIX;
 		DOCUMENT_INDEX_FILENAME = filenameTemplate + DOC_INDEX_SUFFIX;
-		LEXICON_FILENAME = filenameTemplate + LEXICONSUFFIX;
-		LEXICON_INDEX_FILENAME = filenameTemplate + LEXICON_INDEX_SUFFIX;
+		//LEXICON_FILENAME = filenameTemplate + LEXICONSUFFIX;
+		//LEXICON_INDEX_FILENAME = filenameTemplate + LEXICON_INDEX_SUFFIX;
 		LOG_FILENAME = filenameTemplate + LOG_SUFFIX;
 	}
 
Index: src/uk/ac/gla/terrier/indexing/BlockIndexer.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/BlockIndexer.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/BlockIndexer.java	(working copy)
@@ -26,9 +26,11 @@
  * Rodrygo Santo <rodrygo{a.}dcs.gla.ac.uk>
  */
 package uk.ac.gla.terrier.indexing;
+import gnu.trove.THashSet;
+
 import java.io.IOException;
 import java.util.Set;
-import gnu.trove.THashSet;
+
 import uk.ac.gla.terrier.structures.FilePosition;
 import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.indexing.BlockDirectIndexBuilder;
@@ -38,8 +40,6 @@
 import uk.ac.gla.terrier.structures.indexing.DocumentIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.DocumentPostingList;
 import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
-import uk.ac.gla.terrier.structures.indexing.UTFBlockInvertedIndexBuilder;
-import uk.ac.gla.terrier.structures.indexing.UTFBlockLexiconBuilder;
 import uk.ac.gla.terrier.terms.TermPipeline;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
@@ -281,14 +281,7 @@
 			(Boolean.parseBoolean(ApplicationSetup.getProperty("block.delimiters.enabled", "false"))
 			? " delimited-block indexing enabled" : ""));
 		currentIndex = Index.createNewIndex(path, prefix);
-		if (UTFIndexing)
-		{
-			lexiconBuilder = new UTFBlockLexiconBuilder(currentIndex);
-		}
-		else
-		{
-			lexiconBuilder = new BlockLexiconBuilder(currentIndex);
-		}
+		lexiconBuilder = new BlockLexiconBuilder(currentIndex, "lexicon");
 		directIndexBuilder = new BlockDirectIndexBuilder(currentIndex);
 		docIndexBuilder = new DocumentIndexBuilder(currentIndex);
 		//int LexiconCount = 0;
@@ -412,16 +405,8 @@
 			return;
 		}
 
-		if (UTFIndexing)
-		{
-			logger.info("Started building the utf block inverted index...");
-			invertedIndexBuilder = new UTFBlockInvertedIndexBuilder(currentIndex);
-		}
-		else
-		{
 			logger.info("Started building the block inverted index...");
-			invertedIndexBuilder = new BlockInvertedIndexBuilder(currentIndex);
-		}
+		invertedIndexBuilder = new BlockInvertedIndexBuilder(currentIndex, "inverted");
 		invertedIndexBuilder.createInvertedIndex();
 		this.finishedInvertedIndexBuild();
 		currentIndex.flush();
@@ -459,14 +444,7 @@
 	/** Hook method, called when the inverted index is finished - ie the lexicon is finished */
 	protected void finishedInvertedIndexBuild()
 	{
-		if (Boolean.parseBoolean(ApplicationSetup.getProperty("lexicon.use.hash","true"))) {
-			logger.debug("Building lexicon hash");
-			try{
-				LexiconBuilder.createLexiconHash(currentIndex);
-			} catch (IOException ioe) {
-				logger.warn("Problem creating (optional) Lexicon Hash", ioe);
-			}
-		}
+		LexiconBuilder.optimise(currentIndex, "lexicon");
 	}
 
 	
Index: src/uk/ac/gla/terrier/indexing/hadoop/Hadoop_BlockSinglePassIndexer.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/hadoop/Hadoop_BlockSinglePassIndexer.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/hadoop/Hadoop_BlockSinglePassIndexer.java	(working copy)
@@ -265,7 +265,7 @@
 		try{
 			tempRM.setBos(new BitOutputStream(
 					currentIndex.getPath() + ApplicationSetup.FILE_SEPARATOR
-					+ currentIndex.getPrefix() + ApplicationSetup.IFSUFFIX ));
+					+ currentIndex.getPrefix() + ".inverted.bf" ));
 		} catch (IOException ioe) {
 			ioe.printStackTrace();
 		}
Index: src/uk/ac/gla/terrier/indexing/hadoop/Hadoop_BasicSinglePassIndexer.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/hadoop/Hadoop_BasicSinglePassIndexer.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/hadoop/Hadoop_BasicSinglePassIndexer.java	(working copy)
@@ -30,9 +30,9 @@
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.LinkedList;
-import java.util.ArrayList;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -50,10 +50,12 @@
 import uk.ac.gla.terrier.compression.BitOutputStream;
 import uk.ac.gla.terrier.indexing.BasicSinglePassIndexer;
 import uk.ac.gla.terrier.indexing.Document;
+import uk.ac.gla.terrier.structures.BasicLexiconEntry;
 import uk.ac.gla.terrier.structures.DocumentIndexInputStream;
 import uk.ac.gla.terrier.structures.FilePosition;
 import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexiconOutputStream;
 import uk.ac.gla.terrier.structures.indexing.DocumentIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.DocumentPostingList;
 import uk.ac.gla.terrier.structures.indexing.singlepass.FieldPostingInRun;
@@ -63,8 +65,8 @@
 import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.HadoopRunWriter;
 import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.HadoopRunsMerger;
 import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.MapData;
-import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.MapEmittedTerm;
 import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.MapEmittedPostingList;
+import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.MapEmittedTerm;
 import uk.ac.gla.terrier.structures.indexing.singlepass.hadoop.SimpleDocumentIndexBuilder;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
@@ -348,7 +350,7 @@
 	 */
 	
 	/** OutputStream for the Lexicon*/ 
-	protected LexiconOutputStream lexstream;
+	protected LexiconOutputStream<String> lexstream;
 	/** runIterator factory being used to generate RunIterators */
 	protected HadoopRunIteratorFactory runIteratorF = null;
 	/** records whether the reduce() has been called for the first time */
@@ -434,11 +436,11 @@
 	 * flushed.
 	 * @param mapData - info about the runs(maps) and the flushes
 	 */
-	public void startReduce(LinkedList<MapData> mapData)
+	public void startReduce(LinkedList<MapData> mapData) throws IOException
 	{
 		logger.info("The number of Reduce Tasks being used : "+jc.getNumReduceTasks());
 		((HadoopRunsMerger)(super.merger)).beginMerge(mapData);
-		lexstream = createLexiconOutputStream(currentIndex.getPath(), currentIndex.getPrefix());
+		lexstream = new FSOMapFileLexiconOutputStream(this.currentIndex, "lexicon", BasicLexiconEntry.Factory.class);
 		// Tell the merger how many to Reducers to merge for
 		((HadoopRunsMerger) merger).setNumReducers(jc.getNumReduceTasks());
 	}
@@ -523,13 +525,13 @@
 		currentIndex.addIndexStructure(
 				"inverted",
 				invertedIndexClass,
-				"uk.ac.gla.terrier.structures.Lexicon,java.lang.String,java.lang.String",
-				"lexicon,path,prefix");
+				"uk.ac.gla.terrier.structures.Index,java.lang.String", 
+				"index,structureName");
 		currentIndex.addIndexStructureInputStream(
 	            "inverted",
-	            invertedIndexInputStreamClass,
-	            "java.lang.String,java.lang.String,uk.ac.gla.terrier.structures.LexiconInputStream",
-	            "path,prefix,lexicon-inputstream");
+                "uk.ac.gla.terrier.structures.InvertedIndexInputStream",
+                "uk.ac.gla.terrier.structures.Index,java.lang.String,java.util.Iterator",
+                "index,structureName,lexicon-inputstream");
 		currentIndex.setIndexProperty("num.inverted.fields.bits", ""+FieldScore.FIELDS_COUNT );
 		
 		//3. document index
@@ -546,17 +548,13 @@
 		//4. close the map phase indices
 		for(Index i : sourceIndices)
 		{
-			String path = i.getPath();
-			String prefix = i.getPrefix();
 			i.close();
 		}
 		//5. finalise the lexicon
-		int numTerms;
-		currentIndex.setIndexProperty("num.Terms",""+ (numTerms = lexstream.getNumberOfTermsWritten()) );
+		currentIndex.setIndexProperty("num.Terms",""+ lexstream.getNumberOfTermsWritten() );
 		currentIndex.setIndexProperty("num.Tokens",""+lexstream.getNumberOfTokensWritten() );
 		currentIndex.setIndexProperty("num.Pointers",""+lexstream.getNumberOfPointersWritten() );
 		lexstream.close();
-		this.createLexicon(numTerms);
 		this.finishedInvertedIndexBuild();
 		currentIndex.flush();
 	}
@@ -573,7 +571,7 @@
 		try{
 			tempRM.setBos(new BitOutputStream(
 					currentIndex.getPath() + ApplicationSetup.FILE_SEPARATOR 
-					+ currentIndex.getPrefix() + ApplicationSetup.IFSUFFIX ));
+					+ currentIndex.getPrefix() + ".inverted.bf"));
 		} catch (IOException ioe) {
 			ioe.printStackTrace();
 		}
Index: src/uk/ac/gla/terrier/indexing/CreateDocumentInitialWeightIndex.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/CreateDocumentInitialWeightIndex.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/CreateDocumentInitialWeightIndex.java	(working copy)
@@ -25,7 +25,6 @@
  */
 package uk.ac.gla.terrier.indexing;
 import java.io.DataOutputStream;
-import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
 
@@ -33,17 +32,17 @@
 
 import uk.ac.gla.terrier.matching.models.languagemodel.LanguageModel;
 import uk.ac.gla.terrier.structures.CollectionStatistics;
-import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.DirectIndex;
 import uk.ac.gla.terrier.structures.DocumentIndex;
+import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.InvertedIndex;
 import uk.ac.gla.terrier.structures.Lexicon;
 import uk.ac.gla.terrier.structures.indexing.DocumentInitialWeightIndex;
 import uk.ac.gla.terrier.structures.indexing.TermEstimateIndex;
+import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.Files;
 import uk.ac.gla.terrier.utility.Rounding;
 import uk.ac.gla.terrier.utility.TerrierTimer;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
 /**
  * This class creates the initial weight index of all
  * documents in the collection. This is done for 
@@ -63,7 +62,7 @@
 	protected InvertedIndex invIndex;
 	
 	/** The Lexicon for retrieval. */
-	protected Lexicon lexicon;
+	protected Lexicon<String> lexicon;
 	
 	/** The DirectIndex for retrieval. */
 	protected DirectIndex directIndex;
@@ -89,16 +88,9 @@
 	
 	/** The data structure of the term esitmates. */
 	protected TermEstimateIndex teIndex;
-	/**
-	 * The default constructor of CreateDocumentInitialWeightIndex.
-	 * @param modelName The name of the applied language model.
-	 */
-	public CreateDocumentInitialWeightIndex(String modelName) 
-	{
-		this(Index.createIndex(), modelName);
-	}
+
 
-	public CreateDocumentInitialWeightIndex(Index i, String modelName) {
+	public CreateDocumentInitialWeightIndex(Index i, String modelName) throws IOException {
 		long startLoading = System.currentTimeMillis();
 		docIndex = i.getDocumentIndex();
 		lexicon = i.getLexicon();
@@ -148,9 +140,9 @@
 		TerrierTimer timer1 = new TerrierTimer();
 		timer1.start();
 		double[] TF = new double[(int)numberOfUniqueTerms];
-		for (int i = 0; i < numberOfUniqueTerms; i++){
-			lexicon.findTerm(i);
-			TF[i] = (double)lexicon.getTF();
+		for (int i = 0; i < numberOfUniqueTerms; i++)
+		{
+			TF[i] = (double)lexicon.getLexiconEntry(i).getValue().getFrequency();
 		}
 		timer1.setBreakPoint();
 		if(logger.isDebugEnabled()) {
Index: src/uk/ac/gla/terrier/indexing/BasicSinglePassIndexer.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/BasicSinglePassIndexer.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/BasicSinglePassIndexer.java	(working copy)
@@ -32,14 +32,12 @@
 import java.util.LinkedList;
 import java.util.Queue;
 
+import uk.ac.gla.terrier.structures.BasicLexiconEntry;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconOutputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexiconOutputStream;
 import uk.ac.gla.terrier.structures.indexing.DocumentIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.DocumentPostingList;
-import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
 import uk.ac.gla.terrier.structures.indexing.singlepass.FieldPostingInRun;
 import uk.ac.gla.terrier.structures.indexing.singlepass.FieldsMemoryPostings;
 import uk.ac.gla.terrier.structures.indexing.singlepass.FileRunIteratorFactory;
@@ -228,15 +226,19 @@
 			try{
 				mp.finish(finishMemoryPosting());
 			}catch(Exception e){
-				e.printStackTrace();
+				logger.error("Problem creating index", e);
 			}
 			endCollection = System.currentTimeMillis();
 			long partialTime = (endCollection-startCollection)/1000;
 			logger.info("Collection #"+collectionNo+ " took "+partialTime+ " seconds to build the runs for "+numberOfDocuments+" documents\n");
 			logger.info("Merging "+fileNames.size()+" runs...");
 			startCollection = System.currentTimeMillis();
+			try{
 			performMultiWayMerge();
 			docIndexBuilder.finishedCollections();
+			} catch (Exception e) {
+				logger.error("Problem finishing index", e);
+			}
 			endCollection = System.currentTimeMillis();
 			logger.info("Collection #"+collectionNo+" took "+((endCollection-startCollection)/1000)+" seconds to merge\n ");
 			logger.info("Collection #"+collectionNo+" total time "+( (endCollection-startCollection)/1000+partialTime));
@@ -305,15 +307,15 @@
 	 * in a set of previously written runs.
 	 * The file names and the number of runs are given by the private queue
 	 */
-	public void performMultiWayMerge(){
+	public void performMultiWayMerge() throws IOException {
 		String[][] fileNames = getFileNames();
-		LexiconOutputStream lexStream = createLexiconOutputStream(path, prefix);
+		LexiconOutputStream<String> lexStream = new FSOMapFileLexiconOutputStream(this.currentIndex, "lexicon", BasicLexiconEntry.Factory.class);
 		try{
 			if (useFieldInformation)
 				createFieldRunMerger(fileNames);
 			else
 				createRunMerger(fileNames);
-			merger.beginMerge(fileNames.length, path + ApplicationSetup.FILE_SEPARATOR + prefix +  ApplicationSetup.IFSUFFIX);
+			merger.beginMerge(fileNames.length, path + ApplicationSetup.FILE_SEPARATOR + prefix +  ".inverted.bf");
 			while(!merger.isDone()){
 				merger.mergeOne(lexStream);
 			}
@@ -330,17 +332,16 @@
 			currentIndex.setIndexProperty("num.Terms", ""+numberOfUniqueTerms);
 			currentIndex.setIndexProperty("num.Pointers", ""+numberOfPointers);
 			currentIndex.setIndexProperty("num.Tokens", ""+numberOfTokens);
-			createLexicon(numberOfUniqueTerms);
 			currentIndex.addIndexStructure(
 					"inverted",
 					invertedIndexClass,
-					"uk.ac.gla.terrier.structures.Lexicon,java.lang.String,java.lang.String",
-					"lexicon,path,prefix");
+					"uk.ac.gla.terrier.structures.Index,java.lang.String", 
+					"index,structureName");
 			currentIndex.addIndexStructureInputStream(
                     "inverted",
-                    invertedIndexInputStreamClass,
-                    "java.lang.String,java.lang.String,uk.ac.gla.terrier.structures.LexiconInputStream",
-                    "path,prefix,lexicon-inputstream");
+                    "uk.ac.gla.terrier.structures.InvertedIndexInputStream",
+                    "uk.ac.gla.terrier.structures.Index,java.lang.String,java.util.Iterator",
+                    "index,structureName,lexicon-inputstream");
 			currentIndex.setIndexProperty("num.inverted.fields.bits", ""+FieldScore.FIELDS_COUNT );
 		}catch(Exception e){
 			logger.error("Problem in performMultiWayMerge", e);
@@ -359,36 +360,6 @@
 		return files;
 	}
 
-	/**
-	 * Hook method that creates the right LexiconBuilder instance
-	 * @throws IOException
-	 */
-	protected void createLexicon(int numberOfEntries) throws IOException{
-		final LexiconInputStream lis = createLexiconInputStream(path, prefix);
-		LexiconBuilder.createLexiconIndex(lis, numberOfEntries, lis.getEntrySize(), path, prefix );
-		currentIndex.addIndexStructure(
-				"lexicon",
-				UTFIndexing ? "uk.ac.gla.terrier.structures.UTFLexicon" :"uk.ac.gla.terrier.structures.Lexicon" );
-		currentIndex.addIndexStructureInputStream(
-				"lexicon",
-				UTFIndexing ? "uk.ac.gla.terrier.structures.UTFLexiconInputStream" :"uk.ac.gla.terrier.structures.LexiconInputStream");
-	}
-
-	/**
-	 * Hook method that creates the rigth LexiconOutputStream instance.
- 	 * @param name filename for the lexicon file.
-	 */
-	protected LexiconOutputStream createLexiconOutputStream(String path, String prefix){
-		return UTFIndexing ? new UTFLexiconOutputStream(path, prefix) : new LexiconOutputStream(path, prefix);
-	}
-
-	/**
-	 * Hook method that creates the rigth LexiconOutputStream instance.
- 	 * @param name filename for the lexicon file.
-	 */
-	protected LexiconInputStream createLexiconInputStream(String path, String prefix){
-		return UTFIndexing ? new UTFLexiconInputStream(path, prefix) : new LexiconInputStream(path, prefix);
-	}
 
 	/**
 	 * Hook method that creates a FieldRunMerger instance
Index: src/uk/ac/gla/terrier/indexing/Indexer.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/Indexer.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/Indexer.java	(working copy)
@@ -33,17 +33,17 @@
 import org.apache.log4j.Logger;
 
 import uk.ac.gla.terrier.structures.Index;
+import uk.ac.gla.terrier.structures.IndexUtil;
 import uk.ac.gla.terrier.structures.indexing.DirectIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.DocumentIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.InvertedIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
 import uk.ac.gla.terrier.structures.merging.BlockStructureMerger;
 import uk.ac.gla.terrier.structures.merging.StructureMerger;
-import uk.ac.gla.terrier.terms.TermPipeline;
 import uk.ac.gla.terrier.terms.SkipTermPipeline;
+import uk.ac.gla.terrier.terms.TermPipeline;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
-import uk.ac.gla.terrier.utility.Files;
 /**
  * <B>Properties:</b>
  * <ul>
@@ -63,15 +63,6 @@
 	/** the logger for this class */
 	protected static Logger logger = Logger.getRootLogger();
 
-	protected static String[] indexFileSuffices = new String[]{
-					ApplicationSetup.PROPERTIES_SUFFIX,
-					ApplicationSetup.IFSUFFIX,
-					ApplicationSetup.DF_SUFFIX,
-					ApplicationSetup.LEXICON_INDEX_SUFFIX,
-					ApplicationSetup.LEXICONSUFFIX,
-					ApplicationSetup.DOC_INDEX_SUFFIX,
-					ApplicationSetup.LEXICON_HASH_SUFFIX};
-
 	protected boolean UTFIndexing = false;
 
 	/**
@@ -322,11 +313,10 @@
 		}
 		else
 		{
-			final String src = path + ApplicationSetup.FILE_SEPARATOR + prefix;
-			final String dest = path + ApplicationSetup.FILE_SEPARATOR + oldIndexPrefix;
-			for (String suffix: indexFileSuffices)
-			{
-				Files.rename(src+suffix, dest+suffix);
+			try{
+				IndexUtil.renameIndex(path, prefix, path, oldIndexPrefix);
+			} catch (IOException ioe ) {
+				logger.error("Could not rename index", ioe);
 			}
 		}
 		//restore the prefix
@@ -374,18 +364,13 @@
 										  
 		sMerger.setNumberOfBits(FieldScore.FIELDS_COUNT);
 		sMerger.mergeStructures();
-		
-		String separator = ApplicationSetup.FILE_SEPARATOR;
 		src1.close(); src2.close(); dst.close();
 		//delete old indices  
-		for(String suffix : indexFileSuffices)
-		{
-			Files.delete(index1[0]+separator+index1[1]+ suffix);
-		}
-
-		for(String suffix : indexFileSuffices)
-        {
-            Files.delete(index2[0]+separator+index2[1]+ suffix);
+		try{
+			IndexUtil.deleteIndex(index1[0], index1[1]);
+			IndexUtil.deleteIndex(index2[0], index2[1]);
+		} catch (IOException ioe) {
+			logger.warn("Could not delete merge input indices ", ioe);
         }
 	}
 
@@ -414,11 +399,10 @@
 		logger.info("Done merging");
 		
 		//rename the generated structures 
-		String src = mpath + ApplicationSetup.FILE_SEPARATOR + mprefix+"_"+ (counterMerged-1);
-		String dest = mpath + ApplicationSetup.FILE_SEPARATOR + mprefix;
-		for (String suffix: indexFileSuffices)
-		{
-			Files.rename(src+suffix, dest+suffix);
+		try{
+			IndexUtil.renameIndex(mpath, mprefix+"_"+ (counterMerged-1), mpath, mprefix);
+		} catch (IOException ioe) {
+			logger.error("Could not rename merged index", ioe);
 		}
 	}
 
Index: src/uk/ac/gla/terrier/indexing/CreateTermEstimateIndex.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/CreateTermEstimateIndex.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/CreateTermEstimateIndex.java	(working copy)
@@ -25,8 +25,9 @@
  */
 package uk.ac.gla.terrier.indexing;
 import java.io.DataOutputStream;
-import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
 
 import org.apache.log4j.Logger;
 
@@ -36,11 +37,11 @@
 import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.InvertedIndex;
 import uk.ac.gla.terrier.structures.Lexicon;
-import uk.ac.gla.terrier.structures.indexing.TermEstimateIndex;
+import uk.ac.gla.terrier.structures.LexiconEntry;
+import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.Files;
 import uk.ac.gla.terrier.utility.Rounding;
 import uk.ac.gla.terrier.utility.TerrierTimer;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
 /**
  * This class creates the term estimate index of all terms in vocabulary. This is
  * done for language modeling approach.
@@ -121,9 +122,10 @@
 	 * Create the TermEstimateIndex. It computes the average term generation probability for each term in the vocabulary of the collection.
 	 *
 	 */
+	@SuppressWarnings("unchecked")
 	public void createTermEstimateIndex(){
 		TerrierTimer timer = null;
-		long numberOfUniqueTerms = collectionStatistics.getNumberOfUniqueTerms();
+		int numberOfUniqueTerms = collectionStatistics.getNumberOfUniqueTerms();
 		if(logger.isInfoEnabled()){
 		logger.info("number of unique terms: " + numberOfUniqueTerms);
 		logger.info("Creating TermEstimateIndex...");
@@ -133,10 +135,15 @@
 			timer.setTotalNumber((double)numberOfUniqueTerms);
 			timer.start();
 		}
-		termEstimates = new double[(int)numberOfUniqueTerms];
-		for (int i = 0; i < numberOfUniqueTerms; i++){
-			lexicon.seekEntry(i);
-			int[][] pointers = invIndex.getDocuments(i);
+		termEstimates = new double[numberOfUniqueTerms];
+		
+		Iterator<Map.Entry<String,LexiconEntry>> lexiconStream = 
+			(Iterator<Map.Entry<String,LexiconEntry>>)index.getIndexStructureInputStream("lexicon");
+		int i=0;
+		while(lexiconStream.hasNext())
+		{
+			Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
+			int[][] pointers = invIndex.getDocuments(lee.getValue());
 			int[] docids = pointers[0];
 			int[] tf = pointers[1];
 			double[] docLength = new double[tf.length];
@@ -148,17 +155,18 @@
 			if(logger.isDebugEnabled()){
 				if ((i+1) % 10000 == 0){
 					timer.setRemainingTime((i+1));
-					logger.debug("term: " + lexicon.getTerm() +
-							", TF: " + lexicon.getTF() +", " +
+					logger.debug("term: " + lee.getKey() +
+							", TF: " + lee.getValue().getFrequency() +", " +
 							Rounding.toString((double)(i+1)/numberOfUniqueTerms*100, 2) +
 							"% finished, time remaining: " + timer.toStringMinutesSeconds());
 				}
 			}
+			i++;
 		}
 		try{
 			DataOutputStream output = new DataOutputStream(
 					Files.writeFileStream(INDEX_FILENAME));
-			for (int i = 0; i < termEstimates.length; i++)
+			for (i = 0; i < termEstimates.length; i++)
 				output.writeDouble(termEstimates[i]);
 			output.close();
 		}
Index: src/uk/ac/gla/terrier/indexing/BasicIndexer.java
===================================================================
--- src/uk/ac/gla/terrier/indexing/BasicIndexer.java	(revision 2526)
+++ src/uk/ac/gla/terrier/indexing/BasicIndexer.java	(working copy)
@@ -35,10 +35,7 @@
 import uk.ac.gla.terrier.structures.indexing.DocumentPostingList;
 import uk.ac.gla.terrier.structures.indexing.InvertedIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
-import uk.ac.gla.terrier.structures.indexing.UTFInvertedIndexBuilder;
-import uk.ac.gla.terrier.structures.indexing.UTFLexiconBuilder;
 import uk.ac.gla.terrier.terms.TermPipeline;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
 import uk.ac.gla.terrier.utility.TermCodes;
 /** 
@@ -171,15 +168,7 @@
 	public void createDirectIndex(Collection[] collections)
 	{
 		currentIndex = Index.createNewIndex(path, prefix);
-		if (UTFIndexing)
-		{
-			lexiconBuilder = new UTFLexiconBuilder(currentIndex);
-		}
-		else
-		{
-			lexiconBuilder = new LexiconBuilder(currentIndex);
-		}
-		
+		lexiconBuilder = new LexiconBuilder(currentIndex, "lexicon");
 		directIndexBuilder = new DirectIndexBuilder(currentIndex);
 		docIndexBuilder = new DocumentIndexBuilder(currentIndex);
 				
@@ -372,16 +361,8 @@
 
 
 		//generate the inverted index
-		if (UTFIndexing)
-		{
-			logger.info("Started building the UTF inverted index...");
-			invertedIndexBuilder = new UTFInvertedIndexBuilder(currentIndex);
-		}
-		else
-		{
 			logger.info("Started building the inverted index...");
-			invertedIndexBuilder = new InvertedIndexBuilder(currentIndex);
-		}
+		invertedIndexBuilder = new InvertedIndexBuilder(currentIndex, "inverted");
 		
 		invertedIndexBuilder.createInvertedIndex();
 		finishedInvertedIndexBuild();
@@ -421,13 +402,6 @@
 	/** Hook method, called when the inverted index is finished - ie the lexicon is finished */
 	protected void finishedInvertedIndexBuild()
 	{
-		if (Boolean.parseBoolean(ApplicationSetup.getProperty("lexicon.use.hash","true"))) {
-			logger.debug("Building lexicon hash");
-			try{
-				LexiconBuilder.createLexiconHash(currentIndex);
-			} catch (IOException ioe) {
-				logger.warn("Problem creating (optional) Lexicon Hash", ioe);
-			}
-		}
+		LexiconBuilder.optimise(currentIndex, "lexicon");
 	}
 }
Index: src/uk/ac/gla/terrier/structures/MapLexicon.java
===================================================================
--- src/uk/ac/gla/terrier/structures/MapLexicon.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/MapLexicon.java	(revision 0)
@@ -0,0 +1,95 @@
+package uk.ac.gla.terrier.structures;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.io.Text;
+import uk.ac.gla.terrier.structures.seralization.WriteableFactory;
+
+import uk.ac.gla.terrier.structures.maps.OrderedMap;
+
+/** Implementation of a lexicon. This class should be subclassed by
+ * any lexicon implementation which use a java.util.Map for storying
+ * entries.
+ * @author Craig Macdonald
+ * @since 3.0
+ */
+public abstract class MapLexicon extends Lexicon<String> implements Closeable
+{
+	protected WriteableFactory<Text> keyFactory = null;
+    protected interface Id2EntryIndexLookup
+    {
+        int getIndex(int termid) throws IOException; 
+    }
+    
+    protected static class IdIsIndex implements Id2EntryIndexLookup
+    {
+        public int getIndex(int termid)
+        {
+            return termid;
+        }
+    }
+    
+    final Map<Text,LexiconEntry> map;
+    Id2EntryIndexLookup idlookup;
+    
+    public MapLexicon(Map<Text,LexiconEntry> backingMap)
+    {
+        this.map = backingMap;
+        this.idlookup = new IdIsIndex();
+    }
+    
+    public MapLexicon(Map<Text,LexiconEntry> backingMap,
+        Id2EntryIndexLookup idlookupobject)
+    {
+        this.map = backingMap;
+        this.idlookup = idlookupobject;
+    }
+    
+    protected void setTermIdLookup(Id2EntryIndexLookup idlookupobject)
+    {
+        this.idlookup = idlookupobject;
+    }
+    
+    public LexiconEntry getLexiconEntry(String term)
+    {
+    	Text key = keyFactory.newInstance();
+    	key.set(term);
+        return map.get(key);
+    }
+    
+    public Map.Entry<String,LexiconEntry> getIthLexiconEntry(int index) 
+    {
+        if (! (map instanceof OrderedMap))
+            throw new UnsupportedOperationException();
+        return toStringEntry(((OrderedMap<Text, LexiconEntry>)map).get(index));
+    }
+    
+    public Map.Entry<String,LexiconEntry> getLexiconEntry(int termid)
+    {
+    	int id;
+    	try{
+    		id = idlookup.getIndex(termid);
+    	} catch (IOException ioe) {
+    		return null;
+    	}
+        return getIthLexiconEntry(id);
+    }
+    
+    public int numberOfEntries()
+    {
+    	return this.map.size();
+    }
+    
+    static Map.Entry<String,LexiconEntry> toStringEntry (Map.Entry<Text,LexiconEntry> a)
+    {
+    	return new LexiconFileEntry<String>(a.getKey().toString(), a.getValue());
+    }
+    
+    public void close()
+    {
+        if (map instanceof Closeable)
+            ((Closeable)map).close();
+        if (idlookup instanceof Closeable)
+            ((Closeable)idlookup).close();
+    }
+}
Index: src/uk/ac/gla/terrier/structures/LexiconEntry.java
===================================================================
--- src/uk/ac/gla/terrier/structures/LexiconEntry.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/LexiconEntry.java	(working copy)
@@ -1,86 +1,14 @@
-
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is BlockDirectIndex.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> (original author)
- *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
- */
 package uk.ac.gla.terrier.structures;
+import org.apache.hadoop.io.Writable;
 
-/** Contains all the information about one entry in the Lexicon. 
-  * Created to make thread-safe lookups in the Lexicon easier. */
-public class LexiconEntry {
-
-	/** Create an empty LexiconEntry */
-	public LexiconEntry(){}
-
-	/** Create a lexicon entry with the following information.
-	  * @param t the term 
-	  * @param tid the term id
-	  * @param n_t the number of documents the term occurs in (document frequency)
-	  * @param TF the total count of therm t in the collection
-	  */
-	public LexiconEntry(String t, int tid, int n_t, int TF)
+public abstract class LexiconEntry implements EntryStatistics, BitIndexPointer, Writable
 	{
-		this.term =t;
-		this.termId = tid;
-		this.n_t = n_t;
-		this.TF = TF;
-	}
 
-	/** increment this lexicon entry by another */
-	public void add(LexiconEntry le)
+    public String toString()
 	{
-		this.n_t += le.n_t;
-		this.TF  += le.TF;
+        return '('+getDocumentFrequency()+","+getFrequency()+')'
+            +'@'+getBytes() + ',' + getBits();
 	}
 
-	/** alter this lexicon entry to subtract another lexicon entry */
-	public void subtract(LexiconEntry le)
-	{
-		this.n_t -= le.n_t;
-		this.TF  -= le.TF;
-	}
-
-	/** the term of this entry */	
-	public String term;
-	/** the termid of this entry */
-	public int termId;
-	/** the number of document that this entry occurs in */
-	public int n_t;
-	/** the total number of occurrences of the term in the index */
-	public int TF;
-	/** the start offset of the entry in the inverted index */
-	public long startOffset;
-	/** the start bit offset of the entry in the inverted index */
-	public byte startBitOffset;
-	/** the end offset of the entry in the inverted index */
-	public long endOffset;
-	/** the end bit offset of the entry in the inverted index */
-	public byte endBitOffset;
-
-	/** returns a string representation of this lexicon entry */	
-	public String toString() {
-		return term + " " + termId + " " + n_t + " " + TF + " " + startOffset + " " + startBitOffset + " " + endOffset + " " + endBitOffset;
-	}
+    public abstract void setTermId(int newTermId);
 }
Index: src/uk/ac/gla/terrier/structures/LexiconInputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/LexiconInputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/LexiconInputStream.java	(working copy)
@@ -1,340 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is LexiconInputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> (original author) 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.EOFException;
-import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.Files;
-/**
- * This class implements an input stream for the lexicon structure.
- * @author Vassilis Plachouras
- * @version $Revision: 1.36 $
- */
-public class LexiconInputStream implements Iterable<String>, Closeable{
-	/** The logger used for the Lexicon */
-	protected Logger logger = Logger.getRootLogger();
-
-	/** The term represented as an array of bytes.*/
-	protected byte[] termCharacters =
-		new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-	/** The term represented as a string.*/
-	protected String term;
-	/** An integer representing the id of the term.*/
-	protected int termId;
-	/** The document frequency of the term.*/
-	protected int documentFrequency;
-	/** The term frequency of the term.*/
-	protected int termFrequency;
-	/** The offset in bytes in the inverted file of the term.*/
-	protected long endOffset;
-	/** The starting offset in bytes in the inverted file of the term.*/
-	protected long startOffset;
-	/** The starting bit offset in the inverted file of the term.*/
-	protected byte startBitOffset;
-	/** 
-	 * The offset in bits in the starting byte in the inverted file.
-	 * Its initial value is -1 so that when we do startBitOffset = 
-	 * endBitOffset +1, the first startBitOffset is 0
-	 */
-	protected byte endBitOffset = -1;
-	/** A data input stream to read from the bufferInput.*/
-	protected DataInput lexiconStream = null;
-	/** The length of the lexicon file. */
-	protected long lexiconFilelength;
-	/** size of one entry of the lexicon */	
-	protected int entrySize = 0;
-
-	/** number of pointers read so far */
-	protected long numPointersRead = 0;
-	/** number of tokens read so far */
-    protected long numTokensRead = 0;
-	/** number of terms read so far */
-	protected int numTermsRead = 0;
-
-	 /** A constructor for child classes that doesnt open the file */
-	protected LexiconInputStream(long a, long b, long c) { }
-
-	/**
-	 * A default constructor. Opens the default lexicon.
-	 */
-	public LexiconInputStream() {
-		this(ApplicationSetup.LEXICON_FILENAME);
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public LexiconInputStream(String filename) {
-		try {
-			lexiconStream = new DataInputStream(Files.openFileStream(filename));
-			this.lexiconFilelength = Files.length(filename);
-		} catch (IOException ioe) {
-			logger.fatal(
-				"I/O Exception occured while opening the lexicon file. Stack trace follows.",ioe);
-		}
-		entrySize = Lexicon.lexiconEntryLength;
-	}
-	
-	public LexiconInputStream(String path, String prefix) {
-		this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param file java.io.File the name of the lexicon file.
-	 */
-	public LexiconInputStream(File file) {
-		try {
-			lexiconStream = new DataInputStream(Files.openFileStream(file));
-			 this.lexiconFilelength = Files.length(file);
-		} catch (IOException ioe) {
-			logger.fatal(
-				"I/O Exception occured while opening the lexicon file. Stack trace follows.",ioe);
-		}
-		entrySize = Lexicon.lexiconEntryLength;
-	}
-	
-	/** Read a lexicon from the specified input stream */
-	public LexiconInputStream(DataInput in) {
-		lexiconStream = in;
-		this.lexiconFilelength = 0;
-		entrySize = Lexicon.lexiconEntryLength;
-	}
-	
-	/**
-	 * Closes the lexicon stream.
-	 * @throws IOException if an I/O error occurs
-	 */
-	public void close() {
-		try{
-			if (lexiconStream instanceof java.io.Closeable)
-				((java.io.Closeable)lexiconStream).close();
-		} catch (IOException ioe){}
-	}
-	
-	public int getEntrySize()
-	{
-		return entrySize;
-	}
-	
-	/**
-	 * Read the next lexicon entry.
-	 * @return the number of bytes read if there is no error, 
-	 *		 otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 */
-	public int readNextEntry() throws IOException {
-		try {
-			startBitOffset = (byte) (endBitOffset + 1);
-			startOffset = endOffset;
-			if (startBitOffset == 8) {
-				startOffset = endOffset + 1;
-				startBitOffset = 0;
-			}
-			lexiconStream.readFully(
-				termCharacters,
-				0,
-				ApplicationSetup.STRING_BYTE_LENGTH);
-			
-			termId = lexiconStream.readInt();
-			documentFrequency = lexiconStream.readInt();
-			termFrequency = lexiconStream.readInt();
-			endOffset = lexiconStream.readLong();
-			endBitOffset = lexiconStream.readByte();
-			numPointersRead += documentFrequency;
-			numTokensRead += termFrequency;
-			numTermsRead++;
-			return Lexicon.lexiconEntryLength;
-		} catch (EOFException eofe) {
-			return -1;
-		}
-	}
-	
-	/** This is an alias to readNextEntry(), except for implementations that 
-	  * cannot parse the string from the byte array. */
-	public int readNextEntryBytes() throws IOException {
-		return readNextEntry();
-	}
-	/**
-	* Returns the number of entries in the lexicon file.
-	*/
-	public int numberOfEntries(){
-		return (int)(lexiconFilelength / Lexicon.lexiconEntryLength);
-	}
-	
-	/**
-	 * Prints out the contents of the lexicon file to check.
-	 */
-	public void print() {
-		int i = 0; //counter
-		int entryLength = getEntrySize();
-		System.err.println("LexOffset, Term, Termid, DF, TF, OffsetBy, OffsetBit");
-		try {
-			while (readNextEntry() != -1) {
-				System.out.println(
-					""
-						+ ((long)i * (long)entryLength)
-						+ ", "
-						+ getTerm()
-						+ ", "
-						+ termId
-						+ ", "
-						+ documentFrequency
-						+ ", "
-						+ termFrequency
-						+ ", "
-						+ endOffset
-						+ ", "
-						+ endBitOffset);
-				i++;
-			}
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while reading the document index " +
-				"input stream. Stack trace follows.",ioe);
-		}
-	}
-
-	/** Returns the number of pointers there would be in an inverted index built using this lexicon (thus far).
-	  * This is equal to the sum of the Nts written to this lexicon output stream. */
-	public long getNumberOfPointersRead()
-	{
-		return numPointersRead;
-	}
-
-	/** Returns the number of tokens there are in the entire collection represented by this lexicon (thus far).
-	  * This is equal to the sum of the TFs written to this lexicon output stream. */
-	public long getNumberOfTokensRead()
-	{
-		return numTokensRead;
-	}
-
-	/** Returns the number of terms written so far by this LexiconInputStream */
-	public int getNumberOfTermsRead()
-	{
-		return numTermsRead;
-	}
-
-
-	/**
-	 * Returns the bit offset in the last byte of 
-	 * the term's entry in the inverted file.
-	 * @return byte the bit offset in the last byte of 
-	 *		 the term's entry in the inverted file
-	 */
-	public byte getEndBitOffset() {
-		return endBitOffset;
-	}
-	/**
-	 * Returns the ending offset of the term's 
-	 * entry in the inverted file.
-	 * @return long The ending byte of the term's 
-	 *			  entry in the inverted file.
-	 */
-	public long getEndOffset() {
-		return endOffset;
-	}
-	/**
-	 * Returns the bit offset in the first byte 
-	 * of the term's entry in the inverted file.
-	 * @return byte the bit offset in the first byte 
-	 *		 of the term's entry in the inverted file
-	 */
-	public byte getStartBitOffset() {
-		return startBitOffset;
-	}
-	/**
-	 * Returns the starting offset of the term's 
-	 * entry in the inverted file.
-	 * @return long The starting byte of the term's entry 
-	 * 				in the inverted file.
-	 */
-	public long getStartOffset() {
-		return startOffset;
-	}
-	/**
-	 * Return the document frequency for the given term.
-	 * @return int The document frequency for the given term
-	 */
-	public int getNt() {
-		return documentFrequency;
-	}
-	/**
-	 * Returns the string representation of the term.
-	 * @return the string representation of the already found term.
-	 */
-	public String getTerm() {
-		return (new String(termCharacters)).trim();
-	}
-	/**
-	 * Returns the term's id.
-	 * @return the term's id.
-	 */
-	public int getTermId() {
-		return termId;
-	}
-	/**
-	 * Returns the term frequency for the already seeked term.
-	 * @return the term frequency in the collection.
-	 */
-	public int getTF() {
-		return termFrequency;
-	}
-	/** 
-	 * Returns the bytes of the String.
-	 * @return the byte array holding the term's byte representation
-	 */
-	public byte[] getTermCharacters() {
-		return termCharacters;
-	}
-
-	/** Returns an Interator of Strings of each term in this lexicon */
-	public Iterator<String> iterator()
-	{
-		return new Iterator<String>(){
-			public boolean hasNext(){
-				try{
-					return readNextEntry() != -1;
-				} catch (IOException ioe) {
-					logger.error(ioe);
-					return false;
-				}
-			}
-			public String next()
-			{
-				return getTerm();
-			}
-			public void remove() { throw new UnsupportedOperationException();}
-
-		};
-	}
-}
Index: src/uk/ac/gla/terrier/structures/FSOMapFileLexiconOutputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/FSOMapFileLexiconOutputStream.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/FSOMapFileLexiconOutputStream.java	(revision 0)
@@ -0,0 +1,101 @@
+package uk.ac.gla.terrier.structures;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+
+import uk.ac.gla.terrier.structures.maps.FSOrderedMapFile;
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
+/** A LexiconOutputStream for FSOMapFileLexicon. Writes to a FSOrderedMapFile.
+ * @author craigm
+ * @since 3.0
+ */
+public class FSOMapFileLexiconOutputStream extends LexiconOutputStream<String>
+{
+	protected FixedSizeWriteableFactory<Text> keyFactory;
+	protected Text tempKey = null;
+	protected final FSOrderedMapFile.MapFileWriter mapFileWriter;
+	protected Index index = null;
+	protected String leValueClassname = null;
+	protected final String structureName;
+	
+	public FSOMapFileLexiconOutputStream(String filename, FixedSizeWriteableFactory<Text> _keyFactory) throws IOException
+	{
+		mapFileWriter = FSOrderedMapFile.mapFileWrite(filename);
+		structureName = null;
+		leValueClassname = null;
+		index = null;
+		keyFactory = _keyFactory;
+		tempKey = keyFactory.newInstance();
+	}
+	
+	@SuppressWarnings("unchecked")
+	public FSOMapFileLexiconOutputStream(String path, String prefix, String _structureName, 
+			FixedSizeWriteableFactory<Text> _keyFactory) throws IOException
+	{
+		super();
+		this.structureName = _structureName;
+		mapFileWriter = FSOrderedMapFile.mapFileWrite(FSOMapFileLexicon.constructFilename(structureName, path, prefix, FSOMapFileLexicon.MAPFILE_EXT));
+		keyFactory = _keyFactory;
+		tempKey = keyFactory.newInstance();
+	}
+	
+	@SuppressWarnings("unchecked")
+	static FixedSizeWriteableFactory<Text> getKeyFactory(Index _index, String _structureName)
+	{
+		_index.addIndexStructure(_structureName+"-keyfactory", 
+				uk.ac.gla.terrier.structures.seralization.FixedSizeTextFactory.class.getName(),
+				"java.lang.String", "${max.term.length}");
+		_index.flush();
+		return (FixedSizeWriteableFactory<Text>)_index.getIndexStructure(_structureName+"-keyfactory");
+	}
+	
+	public FSOMapFileLexiconOutputStream(Index _index, String _structureName, 
+			Class<? extends FixedSizeWriteableFactory<LexiconEntry>>valueFactoryClass) throws IOException
+	{
+		this(_index.getPath(), _index.getPrefix(), _structureName, getKeyFactory(_index, _structureName));
+		this.index = _index;
+		leValueClassname = valueFactoryClass.getName();
+	}
+
+	
+	@Override
+	public int writeNextEntry(String _key, LexiconEntry _value) throws IOException {
+		tempKey.set(_key);
+		mapFileWriter.write(tempKey, _value);
+		super.incrementCounters(_value);
+		return keyFactory.getSize() /* + TODO */;
+	}
+	
+	@Override
+	public void close()
+	{
+		try{
+			mapFileWriter.close();
+		} catch (IOException ioe) {}
+		
+		if (index != null)
+		{
+			addLexiconToIndex(index, this.structureName, this.leValueClassname);
+		}
+	}
+	
+	public static void addLexiconToIndex(Index index, String structureName, String leValueClassname)
+	{
+		index.addIndexStructure(
+				structureName+"-valuefactory",
+				leValueClassname,
+				"", "");
+		index.addIndexStructure(
+				structureName, 
+				"uk.ac.gla.terrier.structures.FSOMapFileLexicon",
+				"java.lang.String,uk.ac.gla.terrier.structures.Index",
+				"structureName,index");
+		index.addIndexStructureInputStream(
+				structureName, 
+				"uk.ac.gla.terrier.structures.FSOMapFileLexicon$MapFileLexiconIterator",
+				"java.lang.String,uk.ac.gla.terrier.structures.Index",
+				"structureName,index");
+	}
+
+}
Index: src/uk/ac/gla/terrier/structures/upgrading/ReverseFile.java
===================================================================
--- src/uk/ac/gla/terrier/structures/upgrading/ReverseFile.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/upgrading/ReverseFile.java	(working copy)
@@ -1,275 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is ReverseFile.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *  Roi Blanco
- */
-
-package uk.ac.gla.terrier.structures.upgrading;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-import uk.ac.gla.terrier.compression.BitFile;
-import uk.ac.gla.terrier.compression.OldBitFile;
-import uk.ac.gla.terrier.structures.BlockInvertedIndex;
-import uk.ac.gla.terrier.structures.InvertedIndex;
-import uk.ac.gla.terrier.structures.Lexicon;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.FieldScore;
-import uk.ac.gla.terrier.utility.Files;
-/**
- * This class provides some utilities for handling inverted files from the old terrier version.
- * Concretely:<br>
- * - Reverses an Inverted File byte to byte (the byte sequence remains the same)
- * abcdefgh| ... --> hgfedcba | ... <br>
- * - Converts an inverted file from the old terrier version (using OldBitFile) to this one (reading the contents of the 
- * file and writting the postings in the new format). <br>
- * - Checks if two inverted files have the same information (this is, the same number of posting lists, with the same 
- * data in them).
- * 
- * @author Roi Blanco
- *
- */
-public class ReverseFile {
-	
-	
-	/**
-	 * Traverses an inverted file written with the old terrier version, and writes it in the new compressed format. 
-	 * @param output String containing the name of the new inverted file (should replace the old one).
-	 */
-	// TODO Support for blocks
-	public static void reverse(String output){
-		BitFile newFile = new BitFile(output, "rw");
-		Lexicon lexicon = new Lexicon();
-		OldBitFile file = new OldBitFile(ApplicationSetup.INVERTED_FILENAME, "rw");
-		try{
-			newFile.writeReset();
-			for (int currentTerm = 0; currentTerm < lexicon.getNumberOfLexiconEntries(); currentTerm++) {
-				lexicon.seekEntry(currentTerm);			
-				byte startBitOffset = lexicon.getStartBitOffset();
-				long startOffset = lexicon.getStartOffset();
-				byte endBitOffset = lexicon.getEndBitOffset();
-				long endOffset = lexicon.getEndOffset();					
-				final int fieldCount = FieldScore.FIELDS_COUNT;
-				final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
-				int df = lexicon.getNt();			
-				file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);		
-				if (loadTagInformation) { 
-					for(int i = 0; i < df ; i++){
-						newFile.writeGamma(file.readGamma());				
-						newFile.writeUnary(file.readUnary());
-						newFile.writeBinary(file.readBinary(fieldCount), fieldCount);
-					}
-				} else { 		
-					for(int i = 0; i < df; i++){							
-						newFile.writeGamma(file.readGamma());
-						newFile.writeUnary(file.readUnary());													
-					}
-				}													
-			}			
-			newFile.close();
-			file.close();
-			lexicon.close();
-		}catch(IOException e){
-			System.err.println("Error writting the compressed file ");
-			e.printStackTrace();
-		}
-	}
-	
-	/**
-	 * Checks if two inverted files contain the same information, without considering the termid or the
-	 * order of the terms in the files.
-	 * @param  invertedFile String file name of the inverted file
-	 * @param  lexiconFile String file name of the lexicon file
-	 */
-	public static void checkIF(String invertedFile, String lexiconFile){
-		Lexicon lexicon = new Lexicon(); 
-		Lexicon lexicon2 = new Lexicon(lexiconFile);
-		InvertedIndex index1;
-		InvertedIndex index2;
-		if (ApplicationSetup.BLOCK_INDEXING) {
-			System.err.println("block html inverted index");
-			index1 = new BlockInvertedIndex(lexicon);
-			index2 = new BlockInvertedIndex(lexicon2);
-		} else {
-			System.err.println("html inverted index");
-			index1 = new InvertedIndex(lexicon);
-			index2 = new InvertedIndex(lexicon2);
-		}
-		
-		if(lexicon.getNumberOfLexiconEntries() != lexicon2.getNumberOfLexiconEntries()){
-			System.err.println("The number of entries of the lexicons ("+lexicon.getNumberOfLexiconEntries()+","+lexicon2.getNumberOfLexiconEntries()+") does not match ");
-			System.exit(0);
-		}
-		String term;
-		long entries = lexicon.getNumberOfLexiconEntries();
-		long check = entries / 10;
-		
-		for(int i = 0; i < entries; i++){
-			if(i%check == 0){
-				System.out.print((i/check)*10+"% ...");
-			}
-			lexicon.seekEntry(i);
-			term = lexicon.getTerm();
-			if(!lexicon2.findTerm(term)){
-				System.err.println("\nTerm "+term+" not found!");
-				System.exit(0);
-			}
-			//get the data from the two inverted files and check it
-			if(lexicon.getNt() != lexicon2.getNt()){
-				System.err.println("\nDifferent df for term "+term+"("+lexicon.getNt()+","+lexicon2.getNt()+")");
-				System.exit(0);
-			}
-			if(lexicon.getTF() != lexicon2.getTF()){
-				System.err.println("\nDifferent df for term "+term+"("+lexicon.getTF()+","+lexicon2.getTF()+")");
-				System.exit(0);
-			}
-		
-			if(!checkArrays(index1.getDocuments(lexicon.getTermId()),index2.getDocuments(lexicon2.getTermId()))){
-				System.err.println("\nPostings for term "+term+" do not match:\n"+index1.getInfo(lexicon.getTermId())+"\n"+index2.getInfo(lexicon2.getTermId()));
-				System.exit(0);
-			}
-		}
-		
-		System.out.println("\nThe inverted files match");				
-	}
-	
-	/**
-	 * Checks whether two int[][] arrays have the same elements 
-	 * @param one the first int[][] array
-	 * @param two the second int[][] array
-	 * @return true if the arrays match
-	 */
-	public static boolean checkArrays(int[][] one, int[][] two){
-		for(int i = 0; i < one.length; i++)
-			for(int j = 0; j < one[i].length; j++){
-				if(one[i][j] != two[i][j])
-					return false;
-			}
-		return true;
-	}
-	
-	/** 
-	 * Reverses a file reading byte to byte
-	 * @param input String with the file to read 
-	 * @param output String output filenae
-	 */
-	public static void reverseByteByByte(String input, String output){
-		// Open the input file for reading
-		DataInputStream dis = null;
-		DataOutputStream dos = null; 
-		try {
-			dis = new DataInputStream(Files.openFileStream(input));
-		} catch (IOException e) {
-			System.err.println("Error while opening "+input+" stack trace follows");
-			e.printStackTrace();
-		}
-		try {
-			dos = new DataOutputStream(Files.writeFileStream(output));
-		} catch (IOException e) {			
-			System.err.println("Error while opening "+output+" stack trace follows");
-			e.printStackTrace();
-		}
-		int byteRead;
-		int i = 0;
-		try{
-			while((byteRead = dis.read())!=-1){
-				i++;
-				dos.writeByte(reverseByte((byteRead)));								
-			}
-		}catch(Exception e){
-			System.err.println("An error ocurred while writing, stack trace follows :");
-			e.printStackTrace();
-		}
-		try{
-			System.out.println("Bytes written "+i);
-			dis.close();
-			dos.close();
-		}catch(Exception e){
-			e.printStackTrace();
-		}
-	}
-	
-	/**
-	 * Reverses one byte.
-	 * @param byteIn byte to reverse.
-	 * @return byte reversed.
-	 */
-	public static int reverseByte(int byteIn){
-		int byteOut = 0;
-		for(int i = 0; i < 8; i++ ){
-			// shift right with 0s and mask
-			byteOut = byteOut >> 1;
-			byteOut |= byteIn & 128;			
-			byteIn = byteIn << 1;
-		}		
-		return ~ byteOut;
-	}
-	
-	/**
-	 * Main class
-	 * @param args (see help)
-	 */
-	public static void main(String args[]){
-		if(args.length < 1){
-			printHelp();
-		}
-		if(args[0].equals("-c")){
-			if(args.length < 2){
-				System.out.println("Path for the second IF needed");
-				System.exit(0);
-			}
-			String filenameTemplate = args[1]+ ApplicationSetup.FILE_SEPARATOR + ApplicationSetup.TERRIER_INDEX_PREFIX;
-			System.out.println("Checking "+ApplicationSetup.INVERTED_FILENAME+" with "+filenameTemplate+ApplicationSetup.IFSUFFIX);			
-			checkIF(filenameTemplate+ApplicationSetup.IFSUFFIX, filenameTemplate+ApplicationSetup.LEXICONSUFFIX);	
-		}
-		else if(args[0].equals("-r")){
-			if(args.length < 2){
-				System.out.println("Output file name needed");
-				System.exit(0);
-			}
-			System.out.println("Reversing to "+args[1]);		
-			reverse(args[0]);			
-		}
-		else if(args[0].equals("-h")){
-			printHelp();
-		}
-		else{
-			System.out.println("Option "+args[0]+" not recognised");
-			printHelp();
-		}			
-		System.exit(0);
-	}
-	
-	/**
-	 * Prints the help.<br>
-	 * Use: java ReverseFile  -c <newInvertedFilePath>	(checks if the content of two inverted files match) <br>
-	 * Use: java ReverseFile  -r <outputFile>		(reverses the byte encoding of the inverted file).
-	 */
-	private static void printHelp(){
-		System.out.println("Use: java ReverseFile  -c <newInvertedFilePath>	(checks if the content of two inverted files match)");
-		System.out.println("Use: java ReverseFile  -r <outputFile>		(reverses the byte encoding of the inverted file)");	
-		System.exit(-1);
-	}	
-}
Index: src/uk/ac/gla/terrier/structures/upgrading/UpgradeIndex.java
===================================================================
--- src/uk/ac/gla/terrier/structures/upgrading/UpgradeIndex.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/upgrading/UpgradeIndex.java	(working copy)
@@ -1,69 +0,0 @@
-/*
- * Terrier - Terabyte Retriever
- * Webpage: http://ir.dcs.gla.ac.uk/terrier
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- *
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is UpgradeIndex.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *  Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original author)
- */
-package uk.ac.gla.terrier.structures.upgrading;
-
-import uk.ac.gla.terrier.structures.Index;
-
-/** Command line utility to upgrade a Terrier index. 
-  * <p><b>Usage:</b><br>
-  * <tt>bin/anyclass.sh uk.ac.gla.terrier.structures.upgrading.UpgradeIndex SourceIndexPath SourceIndexPrefix DestIndexPath DestIndexPrefix</tt>.
-  * <p><b>Notes:</b><br>
-  * <ol><li>Upgrading from Terrier 1.x indices: The upgrader can figure out most things, but you must specify the number of fields correctly 
-  * in the property <tt>FieldTags.process</tt> - for example <tt>FieldTags.process=TITLE,H1</tt>. 
-  * </ol>
-  * @author Craig Macdonald
-  * @since 2.0
-  * @version $Revision: 1.3 $ */
-public class UpgradeIndex
-{
-	public static void main (String[] args)
-	{
-		if (args.length != 4)
-		{
-			System.err.println("Usage: uk.ac.gla.terrier.structures.upgrading.UpgradeIndex"
-				+ " sourceIndexPath sourceIndexPrefix destIndexPath destIndexPrefix");
-			System.err.println("Upgrades a Terrier index to the latest version");
-			return;
-		}
-		final Index sourceIndex = Index.createIndex(args[0], args[1]);
-		final String sourceVersion = sourceIndex.getIndexProperty("index.terrier.version", "unknown");
-		if (sourceVersion.startsWith("1.0"))
-		{
-			System.err.println("Upgrading Terrier 1.x.x index");
-			try{
-				new Terrier1xxIndexUpgrader(sourceIndex, Index.createNewIndex(args[2], args[3])).upgrade();
-			} catch (Exception e) {
-				System.err.println("Problem upgrading index:"+ e);
-				e.printStackTrace();
-			}
-			System.err.println("Done");
-		}
-		else
-		{
-			System.err.println("Sorry, only Terrer version 1 indices can be upgraded. Your index format is + '"+sourceVersion+"'");	
-		}
-	}
-}
Index: src/uk/ac/gla/terrier/structures/upgrading/Terrier1xxIndexUpgrader.java
===================================================================
--- src/uk/ac/gla/terrier/structures/upgrading/Terrier1xxIndexUpgrader.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/upgrading/Terrier1xxIndexUpgrader.java	(working copy)
@@ -1,214 +0,0 @@
-/*
- * Terrier - Terabyte Retriever
- * Webpage: http://ir.dcs.gla.ac.uk/terrier
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- *
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is 1xxIndexUpgrader.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *  Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original author)
- */
-
-
-package uk.ac.gla.terrier.structures.upgrading;
-
-import uk.ac.gla.terrier.structures.*;
-import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
-import uk.ac.gla.terrier.structures.indexing.DocumentIndexBuilder;
-import uk.ac.gla.terrier.utility.FieldScore;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import java.io.IOException;
-/** Class to upgrade a Terrier 1.x index. Can be invoked from the command line using uk.ac.gla.terrier.structures.upgrading.UpgradeIndex 
-  * @see uk.ac.gla.terrier.structures.upgrading.UpgradeIndex 
-  * @since 2.0 
-  * @author Craig Macdonald
-  * @version $Revision: 1.3 $ */
-public class Terrier1xxIndexUpgrader extends aIndexUpgrader
-{
-	/** Create a new IndexUpgrader from sourceIndex to destIndex.
-	  * @param sourceIndex Index to use as the source of the upgrade 
-	  * @param destIndex Fresh Index to use as the destination of the upgrade
-	  */
-	public Terrier1xxIndexUpgrader(Index sourceIndex, Index destIndex)
-	{
-		super(sourceIndex, destIndex);
-	}
-
-	 /** upgrade this index, source Index read, destination Index written */
-    public void upgrade() throws Exception
-	{
-		//inverted index & lexicon
-		if (sourceIndex.hasIndexStructure("inverted") && sourceIndex.hasIndexStructure("lexicon"))
-		{
-			try{
-				upgradeInvertedIndex();
-			} catch (Exception e) {
-				throw new Exception("Unable to upgrade inverted index", e);
-			}
-		}
-
-		//direct index & direct index
-		if (sourceIndex.hasIndexStructure("direct"))
-		{
-			try{
-				upgradeDirectIndex();
-			} catch (Exception e) {
-				throw new Exception("Unable to upgrade direct index", e);
-			}
-			upgradeDirectIndex();
-		}//document index only
-		else if (sourceIndex.hasIndexStructure("document"))
-		{
-			try{
-				upgradeDocumentIndex();
-			} catch (Exception e) {
-				throw new Exception("Unable to upgrade document index", e);
-			}
-
-		}
-	}
-
-	/** Upgrade the inverted index, with lexicon */
-	protected void upgradeInvertedIndex() throws IOException
-	{
-		final InvertedIndexInputStream iiis = (InvertedIndexInputStream)
-			sourceIndex.getIndexStructureInputStream("inverted");
-		final LexiconInputStream lis = (LexiconInputStream)sourceIndex.getIndexStructureInputStream("lexicon");
-		final boolean UTFIndexing = lis instanceof UTFLexiconInputStream;
-		final LexiconOutputStream los = UTFIndexing
-			? new UTFLexiconOutputStream(destIndex.getPath(), destIndex.getPrefix())
-			: new LexiconOutputStream(destIndex.getPath(), destIndex.getPrefix());
-		
-		final String OutDfFilename = destIndex.getPath() + 
-				ApplicationSetup.FILE_SEPARATOR + 
-				destIndex.getPrefix() + ApplicationSetup.IFSUFFIX;
-		
-		final DirectInvertedOutputStream ios = iiis instanceof BlockInvertedIndexInputStream
-			? new BlockDirectInvertedOutputStream(OutDfFilename, FieldScore.FIELDS_COUNT)
-			: new DirectInvertedOutputStream(OutDfFilename, FieldScore.FIELDS_COUNT);
-		
-		int[][] termPostings = null;
-        LexiconEntry le =  null;
-		//for each posting in the inverted index
-        while((termPostings = iiis.getNextDocuments()) != null)
-        {
-            ios.writePostings(termPostings, termPostings[0][0]+1);
-            //write new offsets of new lexicon (los), using information from lis.
-            los.writeNextEntry(lis.getTerm(),lis.getTermId(),lis.getNt(),lis.getTF(),
-				ios.getByteOffset(),ios.getBitOffset());
-		}
-		destIndex.addIndexStructure(
-                "lexicon",
-                UTFIndexing 
-					? "uk.ac.gla.terrier.structures.UTFLexicon" 
-					: "uk.ac.gla.terrier.structures.Lexicon" );
-        destIndex.addIndexStructureInputStream(
-                "lexicon",
-                UTFIndexing 
-					? "uk.ac.gla.terrier.structures.UTFLexiconInputStream" 
-					: "uk.ac.gla.terrier.structures.LexiconInputStream");
-		destIndex.setIndexProperty("num.Terms", ""+los.getNumberOfTermsWritten() );
-        destIndex.setIndexProperty("num.Pointers", ""+los.getNumberOfPointersWritten());
-        destIndex.setIndexProperty("num.Tokens", ""+los.getNumberOfTokensWritten());
-		destIndex.setIndexProperty("num.Documents", ""+destIndex.getCollectionStatistics().getNumberOfDocuments());
-		ios.close();
-		los.close();
-        LexiconBuilder.createLexiconIndex(destIndex);
-		//TODO create lexicon hash if enabled?
-		destIndex.flush();
-	}
-
-	/** Upgrade the direct index, with document index */
-	protected void upgradeDirectIndex() throws IOException
-	{
-
-		final DocumentIndexBuilder docidOutput = new DocumentIndexBuilder(destIndex);
-		
-        final DirectIndex dfInput1 = sourceIndex.getDirectIndex();
-        final boolean use_blocks = dfInput1 instanceof BlockDirectIndex;
-		String OutDfFilename = destIndex.getPath() + 
-				ApplicationSetup.FILE_SEPARATOR + 
-				destIndex.getPrefix() + ApplicationSetup.DF_SUFFIX;
-        final DirectInvertedOutputStream dfOutput = use_blocks
-			? new uk.ac.gla.terrier.structures.BlockDirectInvertedOutputStream(OutDfFilename, FieldScore.FIELDS_COUNT)
-			: new uk.ac.gla.terrier.structures.DirectInvertedOutputStream(OutDfFilename, FieldScore.FIELDS_COUNT);
-
-        final DocumentIndexInputStream docidInput1 = (DocumentIndexInputStream)sourceIndex.getIndexStructureInputStream("document");
-
-        while (docidInput1.readNextEntry() >= 0) {
-            if (docidInput1.getDocumentLength() > 0)
-            {
-                final int[][] terms = dfInput1.getTerms(docidInput1.getDocumentId());
-                dfOutput.writePostings(terms, terms[0][0]+1);
-            }
-            long endByte = dfOutput.getByteOffset();
-            byte endBit = dfOutput.getBitOffset();
-            endBit--;
-
-            if (endBit < 0 && endByte > 0) {
-                endBit = 7;
-                endByte--;
-            }
-
-            docidOutput.addEntryToBuffer(docidInput1.getDocumentNumber(),
-                                    docidInput1.getDocumentLength(),
-                                    new FilePosition(endByte, endBit));
-        }
-        dfInput1.close();
-        docidInput1.close();
-		docidOutput.finishedCollections();
-        docidOutput.close();
-
-		destIndex.addIndexStructure(
-                    "direct",
-					use_blocks
-						? "uk.ac.gla.terrier.structures.BlockDirectIndex"
-						: "uk.ac.gla.terrier.structures.DirectIndex",
-                    "uk.ac.gla.terrier.structures.DocumentIndex,java.lang.String,java.lang.String",
-                    "document,path,prefix");
-        destIndex.addIndexStructureInputStream(
-                    "direct",
-					use_blocks
-						? "uk.ac.gla.terrier.structures.BlockDirectIndexInputStream"
-						: "uk.ac.gla.terrier.structures.DirectIndexInputStream",
-                    "uk.ac.gla.terrier.structures.DocumentIndexInputStream,java.lang.String,java.lang.String",
-                    "document-inputstream,path,prefix");
-        destIndex.flush();
-	}
-
-	/** Upgrade the document index only */
-	protected void upgradeDocumentIndex() throws IOException
-	{
-		final DocumentIndexBuilder docidOutput = new DocumentIndexBuilder(destIndex);
-
-		//opening the first set of files.
-        final DocumentIndexInputStream docidInput1 = (DocumentIndexInputStream)sourceIndex.getIndexStructureInputStream("document");
-
-        //traversing the first set of files, without any change
-        while (docidInput1.readNextEntry() >= 0) {
-
-            docidOutput.addEntryToBuffer(docidInput1.getDocumentNumber(),
-                                    docidInput1.getDocumentLength(),
-                                    new FilePosition(0L, (byte)0));
-        }
-        docidInput1.close();
-		docidOutput.finishedCollections();
-        docidOutput.close();
-		destIndex.flush();
-	}
-}
Index: src/uk/ac/gla/terrier/structures/upgrading/aIndexUpgrader.java
===================================================================
--- src/uk/ac/gla/terrier/structures/upgrading/aIndexUpgrader.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/upgrading/aIndexUpgrader.java	(working copy)
@@ -1,48 +0,0 @@
-/*
- * Terrier - Terabyte Retriever
- * Webpage: http://ir.dcs.gla.ac.uk/terrier
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- *
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is aIndexUpgrader.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *  Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original author)
- */
-package uk.ac.gla.terrier.structures.upgrading;
-
-import uk.ac.gla.terrier.structures.Index;
-/** Abstract class for upgrading an index to another index 
- * 	@version $Revision: 1.3 $ 
- * 	@author Craig Macdonald 
- * 	@since 2.0 */
-public abstract class aIndexUpgrader
-{
-	/** source Index */
-	final Index sourceIndex;
-	/** destination Index */
-	final Index destIndex;
-	/** record the source and dest indices */
-	public aIndexUpgrader(Index sourceIndex, Index destIndex)
-	{	
-		this.sourceIndex = sourceIndex;
-		this.destIndex = destIndex;
-	}
-
-	/** upgrade this index, source to destination */
-	public abstract void upgrade() throws Exception;
-}
Index: src/uk/ac/gla/terrier/structures/UTFBlockLexiconOutputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/UTFBlockLexiconOutputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/UTFBlockLexiconOutputStream.java	(working copy)
@@ -1,153 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is BlockLexiconOutputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.DataOutput;
-import java.io.File;
-import java.io.IOException;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.StringTools;
-/**
- * An output stream for writing the lexicon to a file sequentially.
- * @author Douglas Johnson, Vassilis Plachouras
- * @version $Revision: 1.10 $
- */
-public class UTFBlockLexiconOutputStream extends BlockLexiconOutputStream {
-	/** A zero buffer for writing to the file.*/
-	private static byte[] zeroBuffer =
-		new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-	/** 
-	 * The number of different blocks in which a term appears.
-	 * This is used only during the creation of the inverted
-	 * file and it can be ignored afterwards.
-	 */
-	protected int blockFrequency;
-	/**
-	 * A default constructor.
-	 */
-	public UTFBlockLexiconOutputStream() {
-		super();
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public UTFBlockLexiconOutputStream(String filename) {
-		super(filename);
-	}
-	/**
-	 * A constructor given the file.
-	 * @param file java.io.File the lexicon file.
-	 */
-	public UTFBlockLexiconOutputStream(File file) {
-		super(file);
-	}
-	
-	/** Create a lexicon using the specified data stream */
-	public UTFBlockLexiconOutputStream(DataOutput out){
-		super(out);
-	}
-	/**
-	 * Write a lexicon entry.
-	 * @return the number of bytes written if there is no error, otherwise returns -1 in case of EOF
-	 * @throws IOException if an I/O error occurs
-	 * @param term the string representation of the term
-	 * @param termId the terms integer identifier
-	 * @param documentFrequency the term's document frequency in the collection
-	 * @param termFrequency the term's frequency in the collection
-	 * @param endOffset the term's ending byte offset in the inverted file
-	 * @param endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	public int writeNextEntry(
-		String term,
-		int termId,
-		int documentFrequency,
-		int termFrequency,
-		int blockFrequency,
-		long endOffset,
-		byte endBitOffset)
-		throws IOException {
-		numPointersWritten += documentFrequency;
-        numTokensWritten += termFrequency;
-		numTermsWritten++;
-		lexiconStream.writeUTF(term);
-		lexiconStream.write(
-			zeroBuffer,
-			0,
-			ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-		lexiconStream.writeInt(termId);
-		lexiconStream.writeInt(documentFrequency);
-		lexiconStream.writeInt(blockFrequency);
-		lexiconStream.writeInt(termFrequency);
-		lexiconStream.writeLong(endOffset);
-		lexiconStream.writeByte(endBitOffset);
-		return Lexicon.lexiconEntryLength;
-	}
-	/**
-	 * Write a lexicon entry.
-	 * @return the number of bytes written if there is no error, otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 * @param term the byte array representation of the term
-	 * @param termId the terms integer identifier
-	 * @param documentFrequency the term's document frequency in the collection
-	 * @param termFrequency the term's frequency in the collection
-	 * @param endOffset the term's ending byte offset in the inverted file
-	 * @param endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	public int writeNextEntry(
-		byte[] term,
-		int termId,
-		int documentFrequency,
-		int termFrequency,
-		int blockFrequency,
-		long endOffset,
-		byte endBitOffset)
-		throws IOException {
-		final int length = term.length;
-		numPointersWritten += documentFrequency;
-        numTokensWritten += termFrequency;
-		lexiconStream.write(term, 0, length);
-		lexiconStream.write(
-   	        zeroBuffer,
-       	    0,
-           	2+ApplicationSetup.STRING_BYTE_LENGTH - length);
-		lexiconStream.writeInt(termId);
-		lexiconStream.writeInt(documentFrequency);
-		lexiconStream.writeInt(blockFrequency);
-		lexiconStream.writeInt(termFrequency);
-		lexiconStream.writeLong(endOffset);
-		lexiconStream.writeByte(endBitOffset);
-		return Lexicon.lexiconEntryLength;
-	}
-	/**
-	 * Sets the block frequency for the given term
-	 * @param blockFrequency The new block frequency
-	 */
-	public void setBF(int blockFrequency) {
-		this.blockFrequency = blockFrequency;
-	}
-}
Index: src/uk/ac/gla/terrier/structures/BlockLexicon.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockLexicon.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/BlockLexicon.java	(working copy)
@@ -1,305 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is BlockLexicon.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.io.RandomDataOutput;
-/**
- * A lexicon class that saves the number of
- * different blocks a term appears in. It is used only during 
- * creating the block inverted index. After the block inverted
- * index has been created, the block lexicon is transformed into 
- * a lexicon.
- * @author Douglas Johnson, Vassilis Plachouras
- * @version $Revision: 1.33 $
- */
-public class BlockLexicon extends Lexicon {
-	/** The logger used */
-	private static Logger logger = Logger.getRootLogger();
-	/** The block frequency of the term */
-	protected int blockFrequency;
-	/**
-	 * The size in bytes of an entry in the lexicon file. An entry corresponds
-	 * to a string, an int (termCode), an int (docf), an int (tf), a long (the
-	 * offset of the end of the term's entry in bytes in the inverted file) and
-	 * a byte (the offset in bits of the last byte of the term's entry in the
-	 * inverted file.
-	 */
-	public static final int lexiconEntryLength = 
-		ApplicationSetup.STRING_BYTE_LENGTH //the string representation
-		+ 16 //the four integers
-		+ 8 //the long
-		+ 1; //the byte
-	/**
-	 * A default constructor.
-	 */
-	public BlockLexicon() {
-		super();
-		
-		try {
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)BlockLexicon.lexiconEntryLength);
-			bufferInput.mark(3 * lexiconEntryLength);
-		} catch (IOException ioe) {
-			logger.fatal
-					("Input/output exception while opening for reading the lexicon file. Stack trace follows",ioe);
-		}
-		inputStreamClass = BlockLexiconInputStream.class;
-	}
-	/**
-	 * Constructs an instace of BlockLexicon and opens the corresponding file.
-	 * @param lexiconName the name of the lexicon file.
-	 */
-	public BlockLexicon(String lexiconName) {
-		super(lexiconName);
-		try {
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)BlockLexicon.lexiconEntryLength);
-			bufferInput.mark(3 * lexiconEntryLength);
-		} catch (IOException ioe) {
-			logger.fatal("Input/output exception while opening for reading the " +
-							"lexicon file. Stack trace follows",ioe);
-		}
-		inputStreamClass = BlockLexiconInputStream.class;
-	}
-
-    public BlockLexicon(String path, String prefix)
-    {
-        this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-    }
-	
-	/**
-	 * Finds the term given its term code.
-	 * 
-	 * @return true if the term is found, else return false
-	 * @param termId
-	 *			the term's id
-	 */
-	public boolean findTerm(int termId) {
-		try {
-			idToOffsetFile.seek((long)termId * 8L);
-			long lexiconOffset = idToOffsetFile.readLong();
-			if (lexiconOffset == 0) {
-				startOffset = 0;
-				startBitOffset = 0;
-				lexiconFile.seek(lexiconOffset);
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH);
-				term = new String(bt);
-				this.termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			} else {
-				lexiconFile.seek(lexiconOffset - 9L);
-				//goes to the lexicon offset minus the long offset and a byte
-				startOffset = lexiconFile.readLong();
-				startBitOffset = lexiconFile.readByte();
-				startBitOffset++;
-				if (startBitOffset == 8) {
-					startBitOffset = 0;
-					startOffset++;
-				}
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH);
-				term = new String(bt);
-				this.termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.fatal("Input/Output exception while reading the idToOffset file. Stack trace follows.",ioe);
-		}
-		return false;
-	}
-	/**
-	 * Performs a binary search in the lexicon in order to locate the given
-	 * term. If the term is located, the properties termCharacters,
-	 * documentFrequency, termFrequency, startOffset, startBitOffset, endOffset
-	 * and endBitOffset contain the values related to the term.
-	 * 
-	 * @param _term the term to search for.
-	 * @return true if the term is found, and false otherwise.
-	 */
-	public boolean findTerm(String _term) {
-		Arrays.fill(buffer, (byte) 0);
-		Arrays.fill(bt, (byte) 0);
-		//byte[] bt = _term.getBytes();
-		final int termLength = ApplicationSetup.STRING_BYTE_LENGTH;			
-		//int _termId = 0;
-		long low = -1;
-		long high = numberOfLexiconEntries;
-		long i;
-		while (high-low>1) {
-			
-			i = (long)(high+low)/2;
-			try {
-				lexiconFile.seek((long)i * (long)BlockLexicon.lexiconEntryLength);
-				lexiconFile.readFully(buffer, 0, termLength);
-			} catch (IOException ioe) {
-				logger.fatal(
-					"Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-			}
-			
-			int compareResult = 0;
-			compareResult = _term.compareTo(new String(buffer).trim());
-			
-			if (compareResult < 1)
-				high = i;
-			else
-				low = i;			
-		}
-		if (high == numberOfLexiconEntries)
-			return false;
-		try {
-			lexiconFile.seek((long)high * (long)BlockLexicon.lexiconEntryLength);
-			lexiconFile.readFully(buffer, 0, termLength);
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-		}	
-		
-		if (_term.compareTo(new String(buffer).trim())==0) {
-			try {
-				findTerm(lexiconFile.readInt());
-				return true;
-			}catch(IOException ioe) {
-				logger.fatal("Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-			}
-		}
-		return false;
-	}
-
-	/**
-	 * Returns the block frequency for the given term
-	 * @return int The block frequency for the given term
-	 */
-	public int getBlockFrequency() {
-		return blockFrequency;
-	}
-	/**
-	 * Seeks the i-th entry of the lexicon.
-	 * @param i
-	 *			The index of the entry we are looking for.
-	 * @return true if the entry was found, false otherwise.
-	 */
-	public boolean seekEntry(int i) {
-		try {
-			if (i > numberOfLexiconEntries)
-				return false;
-			if (i == 0) {
-				lexiconFile.seek((long)i * (long)lexiconEntryLength);
-				startOffset = 0;
-				startBitOffset = 0;
-				lexiconFile.readFully(buffer, 0, ApplicationSetup.STRING_BYTE_LENGTH);
-				term = new String(buffer);
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			} else {
-				lexiconFile.seek((long)i * (long)lexiconEntryLength - (long)lexiconEntryLength
-						+ (long)ApplicationSetup.STRING_BYTE_LENGTH + 12L);
-				startOffset = lexiconFile.readLong();
-				startBitOffset = lexiconFile.readByte();
-				startBitOffset++;
-				if (startBitOffset == 8) {
-					startBitOffset = 0;
-					startOffset++;
-				}
-				lexiconFile.readFully(buffer, 0, ApplicationSetup.STRING_BYTE_LENGTH);
-				term = new String(buffer);
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.fatal("Input/Output exception while reading the idToOffset file. Stack trace follows.",ioe);
-		}
-		return false;
-	}
-	
-	/**
-	 * In an already stored entry in the lexicon file, the information about the
-	 * term frequency, the endOffset in bytes, and the endBitOffset in the last
-	 * byte, is updated. The term is specified by the index of the entry.
-	 * 
-	 * @return true if the information is updated properly, otherwise return
-	 *		 false
-	 * @param i the i-th entry
-	 * @param frequency the term's Frequency
-	 * @param endOffset the offset of the ending byte in the inverted file
-	 * @param endBitOffset the offset in bits in the ending byte in the term's entry in
-	 *			inverted file
-	 * @deprecated The BlockLexicon is used during indexing, but not during
-	 *			 retrieval.
-	 */
-	public boolean updateEntry(int i, int frequency, long endOffset,
-			byte endBitOffset) {
-		if (! (lexiconFile instanceof RandomDataOutput))
-            return false;
-        RandomDataOutput _lexiconFile = (RandomDataOutput)lexiconFile;
-		try {
-			long lexiconOffset = (long)i * (long)lexiconEntryLength;
-			//we seek the offset where the frequency should be writen
-			_lexiconFile.seek((long)lexiconOffset
-					+ (long)ApplicationSetup.STRING_BYTE_LENGTH + 8L);
-			_lexiconFile.writeInt(frequency);
-			_lexiconFile.writeLong(endOffset);
-			_lexiconFile.writeByte(endBitOffset);
-		} catch (IOException ioe) {
-			logger.fatal("Input/Output exception while updating the lexicon file. Stack trace follows.");
-		}
-		return false;
-	}
-
-	public static int numberOfEntries(File f)
-	{
-		return (int)(f.length()/ (long)lexiconEntryLength);
-	}
-
-	public static int numberOfEntries(String filename)
-	{
-		return numberOfEntries(new File(filename));
-	}
-
-}
Index: src/uk/ac/gla/terrier/structures/BitIndexPointer.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BitIndexPointer.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/BitIndexPointer.java	(revision 0)
@@ -0,0 +1,5 @@
+package uk.ac.gla.terrier.structures;
+
+public interface BitIndexPointer extends BitFilePosition {
+	public int getNumberOfEntries();
+}
Index: src/uk/ac/gla/terrier/structures/BlockLexiconEntry.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockLexiconEntry.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/BlockLexiconEntry.java	(revision 0)
@@ -0,0 +1,69 @@
+package uk.ac.gla.terrier.structures;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class BlockLexiconEntry extends BasicLexiconEntry implements BlockEntryStatistics {
+	int blockCount;
+	
+	public static class Factory extends BasicLexiconEntry.Factory
+	{
+		public int getSize() {
+			return super.getSize() + 4;
+		}
+		public LexiconEntry newInstance() {
+			return new BlockLexiconEntry();
+		}
+	}
+	
+	public BlockLexiconEntry() {
+		super();
+	}
+	
+	public BlockLexiconEntry(int tid, int n_t, int TF, BitFilePosition offset, int _blockCount) {
+		super(tid, n_t, TF, offset);
+		blockCount = _blockCount;
+	}
+	public BlockLexiconEntry(int tid, int n_t, int TF, long _startOffset, byte _startBitOffset, int _blockCount) {
+		super(tid, n_t, TF, _startOffset, _startBitOffset);
+		blockCount = _blockCount;
+	}
+	public BlockLexiconEntry(int tid, int n_t, int TF, int _blockCount) {
+		super(tid, n_t, TF);
+		blockCount = _blockCount;
+	}
+	/** @{inheritDoc} */
+	public int getBlockCount()
+	{
+		return blockCount;
+	}
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		super.readFields(in);
+		blockCount = in.readInt();
+		
+	}
+	@Override
+	public void write(DataOutput out) throws IOException {
+		super.write(out);
+		out.writeInt(blockCount);
+	}
+	
+	@Override
+	public void add(EntryStatistics le)
+	{
+		super.add(le);
+		if (le instanceof BlockEntryStatistics)
+			blockCount += ((BlockEntryStatistics)le).getBlockCount();
+	}
+	
+	@Override
+	public void subtract(EntryStatistics le)
+	{
+		super.subtract(le);
+		if (le instanceof BlockEntryStatistics)
+			blockCount -= ((BlockEntryStatistics)le).getBlockCount();
+	}
+	
+}
Index: src/uk/ac/gla/terrier/structures/BlockLexiconOutputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockLexiconOutputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/BlockLexiconOutputStream.java	(working copy)
@@ -1,154 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is BlockLexiconOutputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.DataOutput;
-import java.io.File;
-import java.io.IOException;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-/**
- * An output stream for writing the lexicon to a file sequentially.
- * @author Douglas Johnson, Vassilis Plachouras
- * @version $Revision: 1.17 $
- */
-public class BlockLexiconOutputStream extends LexiconOutputStream {
-	/** A zero buffer for writing to the file.*/
-	private static byte[] zeroBuffer =
-		new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-	/** 
-	 * The number of different blocks in which a term appears.
-	 * This is used only during the creation of the inverted
-	 * file and it can be ignored afterwards.
-	 */
-	protected int blockFrequency;
-	/**
-	 * A default constructor.
-	 */
-	public BlockLexiconOutputStream() {
-		super();
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public BlockLexiconOutputStream(String filename) {
-		super(filename);
-	}
-	/**
-	 * A constructor given the file.
-	 * @param file java.io.File the lexicon file.
-	 */
-	public BlockLexiconOutputStream(File file) {
-		super(file);
-	}
-	
-	/** Create a lexicon using the specified data stream */
-	public BlockLexiconOutputStream(DataOutput out){
-		super(out);
-	}
-	/**
-	 * Write a lexicon entry.
-	 * @return the number of bytes written if there is no error, otherwise returns -1 in case of EOF
-	 * @throws IOException if an I/O error occurs
-	 * @param term the string representation of the term
-	 * @param termId the terms integer identifier
-	 * @param documentFrequency the term's document frequency in the collection
-	 * @param termFrequency the term's frequency in the collection
-	 * @param endOffset the term's ending byte offset in the inverted file
-	 * @param endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	public int writeNextEntry(
-		String term,
-		int termId,
-		int documentFrequency,
-		int termFrequency,
-		int blockFrequency,
-		long endOffset,
-		byte endBitOffset)
-		throws IOException {
-		byte[] tmpBytes = term.getBytes();
-		final int length = tmpBytes.length;
-		numPointersWritten += documentFrequency;
-        numTokensWritten += termFrequency;
-		numTermsWritten++;
-		lexiconStream.write(tmpBytes, 0, length);
-		lexiconStream.write(
-			zeroBuffer,
-			0,
-			ApplicationSetup.STRING_BYTE_LENGTH - length);
-		lexiconStream.writeInt(termId);
-		lexiconStream.writeInt(documentFrequency);
-		lexiconStream.writeInt(blockFrequency);
-		lexiconStream.writeInt(termFrequency);
-		lexiconStream.writeLong(endOffset);
-		lexiconStream.writeByte(endBitOffset);
-		return BlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * Write a lexicon entry.
-	 * @return the number of bytes written if there is no error, otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 * @param term the byte array representation of the term
-	 * @param termId the terms integer identifier
-	 * @param documentFrequency the term's document frequency in the collection
-	 * @param termFrequency the term's frequency in the collection
-	 * @param endOffset the term's ending byte offset in the inverted file
-	 * @param endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	public int writeNextEntry(
-		byte[] term,
-		int termId,
-		int documentFrequency,
-		int blockFrequency,
-		int termFrequency,
-		long endOffset,
-		byte endBitOffset)
-		throws IOException {
-		final int length = term.length;
-		numPointersWritten += documentFrequency;
-        numTokensWritten += termFrequency;
-		lexiconStream.write(term, 0, term.length);
-		lexiconStream.write(
-			zeroBuffer,
-			0,
-			ApplicationSetup.STRING_BYTE_LENGTH - length);	
-		lexiconStream.writeInt(termId);
-		lexiconStream.writeInt(documentFrequency);
-		lexiconStream.writeInt(blockFrequency);
-		lexiconStream.writeInt(termFrequency);
-		lexiconStream.writeLong(endOffset);
-		lexiconStream.writeByte(endBitOffset);
-		return BlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * Sets the block frequency for the given term
-	 * @param blockFrequency The new block frequency
-	 */
-	public void setBF(int blockFrequency) {
-		this.blockFrequency = blockFrequency;
-	}
-}
Index: src/uk/ac/gla/terrier/structures/BasicTermStatsLexiconEntry.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BasicTermStatsLexiconEntry.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/BasicTermStatsLexiconEntry.java	(revision 0)
@@ -0,0 +1,89 @@
+package uk.ac.gla.terrier.structures;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class BasicTermStatsLexiconEntry extends LexiconEntry {
+	protected int n_t;
+	protected int TF;
+	protected int termId;
+	
+	public BasicTermStatsLexiconEntry() {}
+	
+	public BasicTermStatsLexiconEntry(int _TF, int _n_t, int _termId)
+	{
+		TF = _TF;
+		n_t = _n_t;
+		termId = _termId;
+	}
+	
+	public int getDocumentFrequency() {
+		return n_t;
+	}
+	
+	public void setDocumentFrequency(int _n_t) {
+		n_t = _n_t;
+	}
+
+	public int getFrequency() {
+		return TF;
+	}
+	
+	public void setFrequency(int _TF) {
+		TF = _TF;
+	}
+
+	public int getTermId() {
+		return termId;
+	}
+	
+	public void setTermId(int _termId) {
+		termId = _termId;
+	}
+	
+	public void setAll(int _TF, int _n_t, int _termId) {
+		TF = _TF;
+		n_t = _n_t;
+		termId = _termId;
+	}
+
+	public int getNumberOfEntries() {
+		return n_t;
+	}
+
+	public byte getBits() {
+		return 0;
+	}
+
+	public long getBytes() {
+		return 0;
+	}
+	
+	public void setPosition(long bytes, byte bits)
+	{
+	}
+
+	public void readFields(DataInput in) throws IOException {
+		TF = in.readInt();
+		n_t = in.readInt();
+		termId = in.readInt();
+	}
+
+	public void write(DataOutput out) throws IOException {
+		out.writeInt(TF);
+		out.writeInt(n_t);
+		out.writeInt(termId);
+	}
+
+	public void add(EntryStatistics le) {
+		TF += le.getFrequency();
+		n_t += le.getDocumentFrequency();
+	}
+
+	public void subtract(EntryStatistics le) {
+		this.n_t -= le.getDocumentFrequency();
+		this.TF  -= le.getFrequency();
+	}
+
+}
Index: src/uk/ac/gla/terrier/structures/FilePosition.java
===================================================================
--- src/uk/ac/gla/terrier/structures/FilePosition.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/FilePosition.java	(working copy)
@@ -38,7 +38,7 @@
  * a bit offset of 2.
  * @author Craig Macdonald, Vassilis Plachouras &amp; John Kane
  */
-public class FilePosition
+public class FilePosition implements BitFilePosition
 {
 	/** The number of bytes a file position could be converted to
 	 * - 8 for the byte's long, 1 for the bits
@@ -83,6 +83,15 @@
 		Bits = in.Bits;
 	}
 
+	public long getBytes() { return Bytes; }
+	public byte getBits() { return Bits; }
+	
+	public void setPosition(long bytes, byte bits)
+	{
+		Bytes = bytes;
+		Bits = bits;
+	}
+
 	/** How large is this object when serialized */	
 	public static int sizeInBytes()
 	{
Index: src/uk/ac/gla/terrier/structures/InvertedIndexInputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/InvertedIndexInputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/InvertedIndexInputStream.java	(working copy)
@@ -27,11 +27,12 @@
 package uk.ac.gla.terrier.structures;
 
 import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
 
 import uk.ac.gla.terrier.compression.BitIn;
 import uk.ac.gla.terrier.compression.BitInputStream;
 import uk.ac.gla.terrier.compression.OldBitInputStream;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
 
 
@@ -43,7 +44,7 @@
 public class InvertedIndexInputStream implements Closeable,LegacyBitFileStructure
 {
 	/** the lexicon input stream providing the offsets */
-	protected final LexiconInputStream lis;
+	protected final Iterator<Map.Entry<?,? extends BitIndexPointer>> lis;
 	/** The gamma compressed file containing the terms. */
 	protected BitIn file; 
 	/** filename of the underlying bitfile */
@@ -52,21 +53,10 @@
 	/** Indicates whether field information is used.*/
 	final boolean useFieldInformation = FieldScore.USE_FIELD_INFORMATION;
 	
-	public InvertedIndexInputStream(String path, String prefix, LexiconInputStream lis) throws IOException
-	{
-		this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.IFSUFFIX, lis);
-	}
-	
-	public InvertedIndexInputStream(String filename, LexiconInputStream lis) throws IOException
+	public InvertedIndexInputStream(Index _index, String structureName, Iterator<Map.Entry<?, ? extends BitIndexPointer>> positions) throws IOException
 	{
-		file = new BitInputStream(this.filename = filename);
-		this.lis = lis;
-	}
-
-	public InvertedIndexInputStream(BitIn invFile, LexiconInputStream lis) throws IOException
-	{
-		file = invFile;
-		this.lis = lis;
+		file = new BitInputStream(_index.getPath() + "/" + _index.getPrefix() +"."+ structureName +".bf");
+		this.lis = positions;
 	}
 
 	/** forces the data structure to reopen the underlying bitfile
@@ -80,15 +70,17 @@
 	}
 	
 	public int[][] getNextDocuments() throws IOException {
-		int rtrLis = lis.readNextEntry();
-		if (rtrLis < 0)
+		if (! lis.hasNext())
 			return null;
-		return getNextDocuments(lis.getNt(), lis.getEndOffset(), lis.getEndBitOffset());
+		
+		return getNextDocuments(lis.next().getValue());
 	}
 	
-	protected int[][] getNextDocuments(int df, long endByteOffset, byte endBitOffset) throws IOException {
+	protected int[][] getNextDocuments(BitIndexPointer pointer) throws IOException {
 		int[][] documentTerms = null;
 		final int fieldCount = FieldScore.FIELDS_COUNT;
+		System.out.println("term"+ ((EntryStatistics)pointer).getTermId() + " has Nt="+pointer.getNumberOfEntries() );
+		final int df = pointer.getNumberOfEntries();
 		if (useFieldInformation) { //if there are tag information to process			
 			documentTerms = new int[3][df];
 			documentTerms[0][0] = file.readGamma() - 1;
@@ -118,7 +110,7 @@
 		try{
 		while((documents = getNextDocuments()) != null)
 		{
-			System.out.print("tid"+i);
+			System.out.print((i++)+"th term: ");
 			if (useFieldInformation) {
 				for (int j = 0; j < documents[0].length; j++) {
 					System.out.print("(" + documents[0][j] + ", " + documents[1][j]
@@ -140,6 +132,7 @@
 	public void close()
 	{
 		try{ file.close(); } catch (IOException ioe) {}
-		lis.close();
+		if (lis instanceof Closeable)
+			((Closeable)lis).close();
 	}
 }
Index: src/uk/ac/gla/terrier/structures/maps/FSOrderedMapFile.java
===================================================================
--- src/uk/ac/gla/terrier/structures/maps/FSOrderedMapFile.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/maps/FSOrderedMapFile.java	(revision 0)
@@ -0,0 +1,690 @@
+package uk.ac.gla.terrier.structures.maps;
+
+import java.io.Closeable;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.AbstractCollection;
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.log4j.Logger;
+
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
+import uk.ac.gla.terrier.structures.seralization.WriteableFactory;
+import uk.ac.gla.terrier.utility.Files;
+import uk.ac.gla.terrier.utility.io.RandomDataInput;
+import uk.ac.gla.terrier.utility.io.RandomDataOutput;
+
+/** An implementation of java.util.Map that can be accessed from disk.
+ * Key and value types are assumed to have a fixed size. Their factories
+ * must be passed to the constructor. In the name, FSOrderedMapFile,
+ * FS stands for Fixed Size.
+ * @author Craig Macdonald
+ * @since 3.0
+ * @param <K> Type of the keys
+ * @param <V> Type of the values
+ */
+public class FSOrderedMapFile<
+        K extends WritableComparable,
+        V extends Writable
+        > 
+    implements OrderedMap<K,V>, Closeable
+{
+    /** The logger used for this class */
+	protected static final Logger logger = Logger.getLogger(FSOrderedMapFile.class);
+
+    public interface FSOMapFileBSearchShortcut<KEY>
+    {
+        public int[] searchBounds(KEY key) throws IOException;
+    }
+
+    interface OrderedMapEntry<K,V> extends Entry<K,V>
+    {
+        public int getIndex();
+    }
+
+    
+    
+    class DefaultMapFileBSearchShortcut<KEY> implements FSOMapFileBSearchShortcut<KEY>
+    {
+		final int[] defaultBounds = new int[]{0,numberOfEntries};
+        public int[] searchBounds(KEY key)
+        {
+            return defaultBounds;
+        }
+    }
+    
+    /** an iterator for entries. */
+    public static class EntryIterator<IK extends Writable,IV extends Writable> 
+    	implements Iterator<Entry<IK,IV>>, java.io.Closeable
+    {
+        protected DataInput di;
+        protected int numEntries;
+        protected int counter = 0;
+        protected WriteableFactory<IK> keyFactory;
+    	protected WriteableFactory<IV> valueFactory;
+        
+        public EntryIterator(String filename, FixedSizeWriteableFactory<IK> _keyFactory, FixedSizeWriteableFactory<IV> _valueFactory)
+        	throws IOException
+        {
+        	this(
+    			new DataInputStream(Files.openFileStream(filename)),
+    			(int)(Files.length(filename)/( _keyFactory.getSize() + _valueFactory.getSize() )),
+    			_keyFactory, 
+    			_valueFactory
+    			);
+        }
+       
+        EntryIterator(DataInput _di, int _numEntries, WriteableFactory<IK> _keyFactory, WriteableFactory<IV> _valueFactory)
+        {
+            di = _di;
+            numEntries = _numEntries;
+            this.keyFactory = _keyFactory;
+        	this.valueFactory = _valueFactory;
+        }
+        
+        public void close() throws IOException
+        {
+        	((Closeable)di).close();
+        }
+        
+        public boolean hasNext()
+        {
+        	//System.err.println(this.toString()+"check:"+(counter < numEntries)+" counter="+counter + " numEntries="+numEntries);
+        	//new Exception().printStackTrace();
+            return counter < numEntries;
+        }
+        
+        public Entry<IK,IV> next()
+        {
+        	//System.err.println(this.toString()+"counter="+counter + " numEntries="+numEntries);
+            if (counter >= numEntries)
+            {
+            	//System.err.println(this.toString()+"ERROR counter="+counter + " numEntries="+numEntries);
+                throw new NoSuchElementException();
+            }
+            IK key = keyFactory.newInstance();
+            IV value = valueFactory.newInstance();
+            try{
+                key.readFields(di);
+                value.readFields(di);
+                counter++;
+            } catch (IOException ioe) {
+                logger.error("IOException while iterating", ioe); 
+                throw new NoSuchElementException("IOException while iterating");
+            }
+            if ((counter == numEntries) && di instanceof Closeable)
+                try{
+                    ((Closeable)di).close();
+                } catch (IOException ioe) {}
+            return new MapFileEntry<IK,IV>(key,value,counter-1);
+        }
+        
+        public void remove() { throw new UnsupportedOperationException();}
+    }
+    
+    /** an iterator for entries. */
+    class valueIterator implements Iterator<V>
+    {
+        DataInput di;
+        int numEntries;
+        int count = 0;
+        K uselessKey;
+       
+        valueIterator(DataInput _di, int _numEntries)
+        {
+            di = _di;
+            numEntries = _numEntries;
+            uselessKey = keyFactory.newInstance();
+        }
+        
+        public boolean hasNext()
+        {
+            return count < numEntries;
+        }
+        
+        public V next()
+        {
+            if (count++ >= numEntries)
+                throw new NoSuchElementException();
+            V value = valueFactory.newInstance();
+            try{
+                uselessKey.readFields(di);
+                value.readFields(di);
+            } catch (IOException ioe) {
+                logger.error("IOException while iterating", ioe); 
+                throw new NoSuchElementException("IOException while iterating");
+            }
+            if ((count == numEntries) && di instanceof Closeable)
+                try{
+                    ((Closeable)di).close();
+                } catch (IOException ioe) {}
+            return value;
+        }
+        
+        public void remove() { throw new UnsupportedOperationException();}
+    }
+    
+    /** an iterator for entries. */
+    class keyIterator implements Iterator<K>, Closeable
+    {
+        DataInput di;
+        int numEntries;
+        int count = 0;
+        V uselessValue;
+       
+        keyIterator(DataInput _di, int _numEntries)
+        {
+            di = _di;
+            numEntries = _numEntries;
+            uselessValue = valueFactory.newInstance();
+        }
+        
+        public boolean hasNext()
+        {
+            return count < numEntries;
+        }
+        
+        public K next()
+        {
+            if (count++ >= numEntries)
+                throw new NoSuchElementException();
+            K key = keyFactory.newInstance();
+            try{
+                key.readFields(di);
+                uselessValue.readFields(di);
+            } catch (IOException ioe) {
+                logger.error("IOException while iterating", ioe); 
+                throw new NoSuchElementException("IOException while iterating");
+            }
+            if ((count == numEntries) && di instanceof Closeable)
+                try{
+                    ((Closeable)di).close();
+                } catch (IOException ioe) {}
+            return key;
+        }
+        
+        public void remove() { throw new UnsupportedOperationException();}
+        public void close() 
+        {
+        	if (di instanceof Closeable)
+        		try{
+                    ((Closeable)di).close();
+                } catch (IOException ioe) {}
+        }
+    }
+
+    
+    class MapFileEntrySet extends AbstractSet<Entry<K,V>>
+    {
+        public boolean add(Map.Entry<K,V> e)
+        {
+            put(e.getKey(), e.getValue());
+            return true;
+        }
+        
+        public int size()
+        {
+            return numberOfEntries;
+        }
+        
+        public boolean isEmpty()
+        {
+            return size() == 0;
+        }
+        
+        public Iterator<Map.Entry<K,V>> iterator()
+        {
+            try{
+              return new EntryIterator<K,V>(
+                    new DataInputStream(Files.openFileStream(dataFilename)),
+                    numberOfEntries,
+                    keyFactory,
+                    valueFactory
+                    );
+            } catch (IOException ioe) {
+                return null;
+            }
+        }
+        
+        @SuppressWarnings("unchecked")
+		public boolean contains(Object o)
+        {
+            K key = (K)o;
+            if (get(key) == null)
+                return false;
+            return true;
+        }
+        
+        public boolean remove(Map.Entry<K,V> e)
+        {
+            remove(e.getKey());
+            return true;
+        }
+        
+        public void clear()
+        {
+            _clear();
+        }
+    }
+    
+    class MapFileKeySet extends AbstractSet<K>
+    {
+        public int size()
+        {
+            return numberOfEntries;
+        }
+        
+        public boolean isEmpty()
+        {
+            return size() == 0;
+        }
+        
+        public Iterator<K> iterator()
+        {
+            try{
+              return new keyIterator(
+                    new DataInputStream(Files.openFileStream(dataFilename)),
+                    numberOfEntries
+                    );
+            } catch (IOException ioe) {
+                return null;
+            }
+        }
+        
+        @SuppressWarnings("unchecked")
+		public boolean contains(Object o)
+        {
+            K key = (K)o;
+            if (get(key) == null)
+                return false;
+            return true;
+        }
+    }
+    
+    
+
+    static class MapFileEntry<EK,EV> implements OrderedMapEntry<EK,EV>
+    {
+        EK key;
+        EV value;
+        int index;
+        MapFileEntry(EK _key, EV _value, int _index)
+        {
+            this.key = _key;
+            this.value = _value;
+            this.index = _index;
+        }
+        
+        public EK getKey()
+        {
+            return key;
+        }
+        
+        public int getIndex()
+        {
+            return index;
+        }
+        
+        public EV getValue()
+        {
+            return value;
+        }
+        
+        public EV setValue(EV value)
+        {
+            
+            //TODO why does this cause exception?
+            //put(this.key, value);
+            return null;
+        }
+        
+        public String toString()
+        {
+            return "Entry<"+key.toString() + ","+value.toString()+">";
+        }
+        
+        @SuppressWarnings("unchecked")
+		public boolean equals(Object o)
+        {
+            Map.Entry<EK,EV> e1 = this;
+            Map.Entry<EK,EV> e2 = (Map.Entry<EK,EV>)o;
+            return (e1.getKey()==null ?
+                e2.getKey()==null : e1.getKey().equals(e2.getKey()))  &&
+                (e1.getValue()==null ?
+                e2.getValue()==null : e1.getValue().equals(e2.getValue()));
+        }
+        
+        public int hashCode()
+        {
+            return 
+                (getKey()==null   ? 0 : getKey().hashCode()) ^
+                (getValue()==null ? 0 : getValue().hashCode());
+        }
+    }
+    
+    class MapFileValueCollection
+        extends AbstractCollection<V>
+        implements Collection<V>
+    {
+        public int size()
+        {
+            return numberOfEntries;
+        }
+        
+        public Iterator<V> iterator()
+        {
+            try{
+            return new valueIterator(
+                new DataInputStream(Files.openFileStream(dataFilename)),
+                    numberOfEntries);
+            } catch (IOException ioe) {
+                logger.error("Problem reading FSOrderedMapFile "+dataFilename+" as stream", ioe);
+                return null;
+            }
+        }
+    }
+
+    /** actual underlying data file */
+	protected RandomDataInput dataFile = null;
+	/** filename of the underlying file */
+	protected String dataFilename;
+	
+	/** The number of entries in the file.*/
+	protected int numberOfEntries;
+	/** total size of one key,value pair */
+	protected int entrySize;
+	
+	protected FSOMapFileBSearchShortcut<K> shortcut = new DefaultMapFileBSearchShortcut<K>();
+	
+	protected FixedSizeWriteableFactory<K> keyFactory;
+	protected FixedSizeWriteableFactory<V> valueFactory;
+	
+	protected RandomDataOutput write()
+	{
+	   if (! (dataFile instanceof RandomDataOutput))
+	       throw new UnsupportedOperationException();
+	   return (RandomDataOutput)dataFile;
+	}
+	
+	public static int numberOfEntries(
+			String filename, 
+			FixedSizeWriteableFactory<?> _keyFactory,
+            FixedSizeWriteableFactory<?> _valueFactory)
+	{
+		long length = Files.length(filename);
+		long entrySize = _keyFactory.getSize() + _valueFactory.getSize();
+		return (int)(length/entrySize);
+	}
+	
+	/** Construct a new object to access the underlying file data structure
+	 * 
+	 * @param filename Filename of the file containing the structure
+	 * @param updateable Whether the file can be updated in this JVM
+	 * @param _keyFactory factory object for keys
+	 * @param _valueFactory factory object for values
+	 * @throws IOException thrown if an IO problem occurs
+	 */
+    public FSOrderedMapFile(
+            String filename,
+            boolean updateable,
+            FixedSizeWriteableFactory<K> _keyFactory,
+            FixedSizeWriteableFactory<V> _valueFactory)
+        throws IOException
+    {
+        this.dataFile = updateable
+            ? Files.writeFileRandom(this.dataFilename = filename)
+            : Files.openFileRandom(this.dataFilename = filename);
+        this.keyFactory = _keyFactory;
+        this.valueFactory = _valueFactory;
+        this.entrySize = _keyFactory.getSize() + _valueFactory.getSize();
+        //System.err.println("FSOrderedMapFile entrySize is "+ this.entrySize);
+        this.numberOfEntries = (int) (dataFile.length() / (long)entrySize);    
+    }
+    
+    public WriteableFactory<K> getKeyFactory() {
+    	return this.keyFactory;
+    }
+    
+    public WriteableFactory<V> getValueFactory() {
+    	return this.valueFactory;
+    }
+    
+    /** Remove all entries from this map */
+    public void clear()
+    {
+        _clear();
+    }
+    
+    
+    //renamed so that inner classes can access
+    protected void _clear()
+    {
+        RandomDataOutput _dataFile = write();
+        try{
+            _dataFile.setLength(0);
+            numberOfEntries = 0;
+        } catch (IOException ioe) {
+            logger.warn("Could not clear FSOrderedMapFile", ioe);
+        }
+    }
+
+    public Set<Entry<K,V>> entrySet()
+    {
+        return new MapFileEntrySet();
+    }
+    
+    public Set<K> keySet()
+    {
+        return new MapFileKeySet();
+    }
+    
+    public Collection<V> values()
+    {
+        return new MapFileValueCollection();
+    }
+    
+    /** Returns the number of entries in this map */
+    public int size()
+    {
+        return numberOfEntries;
+    }
+    
+    public boolean containsValue(Object o)
+    {
+        throw new UnsupportedOperationException();
+    }
+    
+    @SuppressWarnings("unchecked")
+	public boolean containsKey(Object o)
+    {
+        return getEntry((K)o) != null;
+    }
+    
+    public boolean isEmpty()
+    {
+        return numberOfEntries == 0;
+    }
+    
+    public V put(K key, V value)
+    {
+        //RandomDataOutput _dataFile = write();
+        throw new UnsupportedOperationException();
+    }
+    
+    public V remove(Object _key)
+    {
+        //K key = (K)_key;
+        //RandomDataOutput _dataFile = write();
+        throw new UnsupportedOperationException();
+    }
+
+    public void setBSearchShortcut(FSOMapFileBSearchShortcut<K> _shortcut)
+    {
+        this.shortcut = _shortcut;
+    }
+    
+    /** this method is the one which does the actual disk lookup of entries */
+    protected Entry<K,V> getEntry(K key)
+    {
+    	int[] bounds;
+    	try{
+    		bounds = shortcut.searchBounds(key);
+    	} catch (IOException ioe) {
+    		bounds = new int[]{0, numberOfEntries};
+    	}
+        int low = bounds[0];
+		int high = bounds[1];
+		
+		int i;
+		int compareEntry;
+		
+		K testKey = keyFactory.newInstance();
+		V value = valueFactory.newInstance();	
+		
+		try{
+		
+            while (high-low>1) 
+            {
+                i = (high + low)/2;
+                if (i==0) {
+                    dataFile.seek(0);
+                    testKey.readFields(dataFile);
+                } else {
+                    dataFile.seek((long)i * entrySize);
+                    testKey.readFields(dataFile);
+                }
+                //System.err.println("Checking "+testKey.toString());
+                if ((compareEntry = key.compareTo(testKey))< 0)
+                    high = i;
+                else if (compareEntry > 0)
+                    low = i;
+                else 
+                {
+                    //read the rest and return the data
+                    value.readFields(dataFile);
+                    return new MapFileEntry<K,V>(testKey, value, i);
+                }
+            }
+        
+            if (high == numberOfEntries)
+                return null;
+            
+            if (high == 0) {
+                i = 0;
+                dataFile.seek(0);
+            } else {
+                i = high;
+                dataFile.seek((long)high * entrySize);
+            }
+            testKey.readFields(dataFile);
+            value.readFields(dataFile);
+        
+            if (key.compareTo(testKey) == 0) {
+                return new MapFileEntry<K,V>(testKey, value, i);
+            }
+		} catch (IOException ioe) {
+		  logger.error("IOException reading FSOrderedMapFile", ioe);
+		}
+		return null;
+    }
+    
+    @SuppressWarnings("unchecked")
+	public V get(Object _key)
+    {
+        K key = (K)_key;
+        Map.Entry<K,V> entry = getEntry(key);
+        if (entry == null)
+            return null;
+        //System.err.println(key.toString() + "=" + entry.getValue().toString());
+        return entry.getValue();
+    }
+    
+    public Entry<K,V> get(int entryNumber)
+    {
+        K key = keyFactory.newInstance();
+		V value = valueFactory.newInstance();
+		if (entryNumber > numberOfEntries)
+		  throw  new NoSuchElementException();
+		
+		try{
+            dataFile.seek((long)entryNumber * entrySize);
+            key.readFields(dataFile);
+            value.readFields(dataFile);
+        } catch (IOException ioe) {
+            throw new NoSuchElementException(
+                "IOException reading FSOrderedMapFile: "+ioe);
+        }
+        return new MapFileEntry<K,V>(key, value, entryNumber);
+    }
+    
+    public void putAll(Map<? extends K,? extends V> m)
+    {
+    	for (Map.Entry<? extends K, ? extends V> e : m.entrySet())
+    		put(e.getKey(), e.getValue()); 
+    }
+    
+    public void close() throws IOException
+    {
+        dataFile.close();
+    }
+    
+    /** writes an entire map FSOrderedMapFile at once, to the specified filename,
+      * and using the data contained in the specified iterator
+      */
+    public static void mapFileWrite(String filename,
+            Iterable<Entry<WritableComparable, Writable>> t) throws IOException
+    {
+        mapFileWrite(filename, t.iterator());
+    }
+      
+    public static void mapFileWrite(String filename,
+            Iterator<Entry<WritableComparable, Writable>> ti)
+        throws IOException
+    {
+        DataOutputStream out = new DataOutputStream(Files.writeFileStream(filename));
+        while (ti.hasNext())
+        {   
+            Entry<WritableComparable, Writable> e = ti.next();
+            e.getKey().write(out);
+            e.getValue().write(out);
+        }
+        out.close();
+    }
+    
+    /** returns a utility class which can be used to write a FSOrderedMapFile */
+    public static MapFileWriter mapFileWrite(final String filename)
+        throws IOException
+    {
+        return new MapFileWriter(){
+            DataOutputStream out = new DataOutputStream(Files.writeFileStream(filename));            
+            public void write(WritableComparable key, Writable value)
+                throws IOException
+            {
+            	//System.err.println("writing key "+ key.toString());
+                key.write(out);
+                //System.err.println("writing value "+ value.toString());
+                value.write(out);
+            }
+            
+            public void close() throws IOException
+            {
+                out.close();
+            }
+        };
+    }
+    
+    public interface MapFileWriter extends Closeable
+    {
+        public void write(WritableComparable key, Writable value)
+            throws IOException;
+    }
+}
Index: src/uk/ac/gla/terrier/structures/maps/OrderedMap.java
===================================================================
--- src/uk/ac/gla/terrier/structures/maps/OrderedMap.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/maps/OrderedMap.java	(revision 0)
@@ -0,0 +1,8 @@
+package uk.ac.gla.terrier.structures.maps;
+import java.util.Map;
+
+public interface OrderedMap<K,V> extends Map<K,V>
+{
+		    /** Return the entry at the specified index */
+		    public Map.Entry<K,V> get(int index);  
+}
Index: src/uk/ac/gla/terrier/structures/BasicLexiconEntry.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BasicLexiconEntry.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/BasicLexiconEntry.java	(revision 0)
@@ -0,0 +1,252 @@
+/*
+ * Terrier - Terabyte Retriever 
+ * Webpage: http://ir.dcs.gla.ac.uk/terrier 
+ * Contact: terrier{a.}dcs.gla.ac.uk
+ * University of Glasgow - Department of Computing Science
+ * http://www.gla.ac.uk/
+ * 
+ * The contents of this file are subject to the Mozilla Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is BlockDirectIndex.java.
+ *
+ * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
+ * All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> (original author)
+ *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
+ */
+package uk.ac.gla.terrier.structures;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.junit.Test;
+
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
+
+/** Contains all the information about one entry in the Lexicon. 
+  * Created to make thread-safe lookups in the Lexicon easier. */
+public class BasicLexiconEntry extends LexiconEntry {
+	
+	public static class Factory implements FixedSizeWriteableFactory<LexiconEntry>
+	{	
+		public int getSize() {
+			//System.err.println("Value size is"+((3*4) + 8 + 1));
+			return (3*4) + 8 + 1;
+		}
+		public LexiconEntry newInstance() {
+			return new BasicLexiconEntry();
+		}
+		
+		public static class Tester
+		{
+			@Test public void testBasic() throws Exception
+			{
+				LexiconEntry le = new BasicLexiconEntry();
+				//term id
+				le.setTermId(1);
+				assertEquals(le.getTermId(), 1);
+				//position
+				le.setPosition(0, (byte)0);
+				assertEquals(le.getBytes(), 0);
+				assertEquals(le.getBits(), (byte)0);
+				
+				le = new BasicLexiconEntry(2, 2, 5);
+				assertEquals(le.getDocumentFrequency(), 2);
+				assertEquals(le.getFrequency(), 5);
+				le.add(new BasicLexiconEntry(0,1,10));
+				assertEquals(le.getDocumentFrequency(), 3);
+				assertEquals(le.getFrequency(), 15);
+			}
+			
+			@Test public void testWritable() throws Exception
+			{
+				Factory f = new Factory();
+				LexiconEntry le = f.newInstance();
+				le.setTermId(100);
+				le.setPosition(10, (byte)11);
+				byte[] b = getBytes(le);
+				System.err.println("le written in "+b.length+" bytes");
+				assertEquals(b.length, f.getSize());
+				LexiconEntry leReader = f.newInstance();
+				populateEntry(leReader, b);
+				assertEquals(le.getTermId(), leReader.getTermId());
+				assertEquals(le.getFrequency(), leReader.getFrequency());
+				assertEquals(le.getDocumentFrequency(), leReader.getDocumentFrequency());
+				assertEquals(le.getBytes(), leReader.getBytes());
+				assertEquals(le.getBits(), leReader.getBits());
+			}
+			
+			@Test public void testWritableFile() throws Exception
+			{
+				Factory f = new Factory();
+				LexiconEntry le = f.newInstance();
+				le.setTermId(100);
+				le.setPosition(10, (byte)11);
+				DataOutputStream dos = new DataOutputStream(new FileOutputStream("/tmp/testFile"));
+				le.write(dos);
+				dos.close();
+				LexiconEntry leReader = f.newInstance();
+				leReader.readFields(new DataInputStream(new FileInputStream("/tmp/testFile")));
+				assertEquals(le.getTermId(), leReader.getTermId());
+				assertEquals(le.getFrequency(), leReader.getFrequency());
+				assertEquals(le.getDocumentFrequency(), leReader.getDocumentFrequency());
+				assertEquals(le.getBytes(), leReader.getBytes());
+				assertEquals(le.getBits(), leReader.getBits());
+				new java.io.File("/tmp/testFile").delete();
+			}
+			
+			static void populateEntry(LexiconEntry le, byte[] b) throws Exception
+			{
+				le.readFields(new DataInputStream(new ByteArrayInputStream(b)));
+			}
+			
+            static byte[] getBytes(Writable w) throws Exception
+    		{
+    			ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    			DataOutputStream dos = new DataOutputStream(buffer);
+    			w.write(dos);
+    			return buffer.toByteArray();
+    		}
+		}
+	}
+	
+	/** the termid of this entry */
+	public int termId;
+	/** the number of document that this entry occurs in */
+	public int n_t;
+	/** the total number of occurrences of the term in the index */
+	public int TF;
+	/** the start offset of the entry in the inverted index */
+	public long startOffset;
+	/** the start bit offset of the entry in the inverted index */
+	public byte startBitOffset;
+
+	/** Create an empty LexiconEntry */
+	public BasicLexiconEntry(){}
+
+	/** Create a lexicon entry with the following information.
+	  * @param tid the term id
+	  * @param n_t the number of documents the term occurs in (document frequency)
+	  * @param TF the total count of therm t in the collection
+	  */
+	public BasicLexiconEntry(int tid, int n_t, int TF)
+	{
+		this.termId = tid;
+		this.n_t = n_t;
+		this.TF = TF;
+	}
+	
+	public BasicLexiconEntry(int tid, int n_t, int TF, long _startOffset, byte _startBitOffset) {
+		this.termId = tid;
+		this.n_t = n_t;
+		this.TF = TF;
+		this.startOffset = _startOffset;
+		this.startBitOffset = _startBitOffset;
+	}
+	
+	public BasicLexiconEntry(int tid, int n_t, int TF, BitFilePosition offset) {
+		this.termId = tid;
+		this.n_t = n_t;
+		this.TF = TF;
+		this.startOffset = offset.getBytes();
+		this.startBitOffset = offset.getBits();
+	}
+	
+	public void setStatistics(int n_t, int TF)
+	{
+		this.n_t = n_t;
+		this.TF = TF;
+	}
+
+	/** increment this lexicon entry by another */
+	public void add(EntryStatistics le)
+	{
+		this.n_t += le.getDocumentFrequency();
+		this.TF  += le.getFrequency();
+	}
+
+	/** alter this lexicon entry to subtract another lexicon entry */
+	public void subtract(EntryStatistics le)
+	{
+		this.n_t -= le.getDocumentFrequency();
+		this.TF  -= le.getFrequency();
+	}
+
+	
+	/** returns a string representation of this lexicon entry */	
+	public String toString() {
+		return "term"+ termId + " Nt=" + n_t + " TF=" + TF 
+			+ " @{" + startOffset + " " + startBitOffset+"}";
+	}
+
+	public int getDocumentFrequency() {
+		return n_t;
+	}
+
+	public int getFrequency() {
+		return TF;
+	}
+
+	public int getTermId() {
+		return termId;
+	}
+
+	public int getNumberOfEntries() {
+		return n_t;
+	}
+
+	public byte getBits() {
+		return startBitOffset;
+	}
+
+	public long getBytes() {
+		return startOffset;
+	}
+	
+	public void setTermId(int newTermId)
+	{
+		termId = newTermId;
+	}
+	
+	public void setPosition(long bytes, byte bits)
+	{
+		startOffset = bytes;
+		startBitOffset = bits;
+	}
+
+	public void readFields(DataInput in) throws IOException {
+		termId = in.readInt();
+		TF = in.readInt();
+		n_t = in.readInt();
+		startOffset = in.readLong();
+		startBitOffset = in.readByte();
+	}
+
+	public void write(DataOutput out) throws IOException {
+		out.writeInt(termId);
+		out.writeInt(TF);
+		out.writeInt(n_t);
+		out.writeLong(startOffset);
+		out.writeByte(startBitOffset);
+	}
+}
Index: src/uk/ac/gla/terrier/structures/IndexUtil.java
===================================================================
--- src/uk/ac/gla/terrier/structures/IndexUtil.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/IndexUtil.java	(revision 0)
@@ -0,0 +1,39 @@
+package uk.ac.gla.terrier.structures;
+
+import java.io.IOException;
+import uk.ac.gla.terrier.utility.Files;
+
+public class IndexUtil {
+
+	/** Move an index from one location to another */
+	public static void renameIndex(String srcPath, String srcPrefix, String dstPath, String dstPrefix)
+		throws IOException
+	{
+		final String actualPrefix = srcPrefix +'.';
+		for (String filename : Files.list(srcPath))
+		{
+			//System.err.println("Checking "+filename);
+			if (filename.startsWith(actualPrefix))
+			{
+				final String newFilename = filename.replaceFirst(srcPrefix, dstPrefix);
+				Files.rename(srcPath + "/" + filename, dstPath+"/"+ newFilename);
+			}
+		} 
+	}
+	
+	/** Delete an existing index */
+	public static void deleteIndex(String path, String prefix)
+		throws IOException
+	{
+		final String actualPrefix = prefix +'.';
+		for (String filename : Files.list(path))
+		{
+			if (filename.startsWith(actualPrefix))
+			{
+				
+				Files.delete(path + "/" + filename);
+			}
+		}
+	}
+
+}
Index: src/uk/ac/gla/terrier/structures/LexiconOutputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/LexiconOutputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/LexiconOutputStream.java	(working copy)
@@ -25,40 +25,14 @@
  */
 package uk.ac.gla.terrier.structures;
 import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.File;
 import java.io.IOException;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.Files;
 /**
  * This class implements an output stream for the lexicon structure.
- * @author Vassilis Plachouras
+ * @author Vassilis Plachouras &amp; Craig Macdonald
  * @version $Revision: 1.29 $
  */
-public class LexiconOutputStream implements Closeable {
-	/** The logger used */
-	private static Logger logger = Logger.getRootLogger();
-	/** A zero buffer for writing to the file.*/
-	protected final byte[] zeroBuffer =
-		new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-	/** The term represented as an array of bytes.*/
-	protected final byte[] termCharacters =
-		new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-	/** The term represented as a string.*/
-	protected String term;
-	/** An integer representing the id of the term.*/
-	protected int termId;
-	/** The document frequency of the term.*/
-	protected int documentFrequency;
-	/** The term frequency of the term.*/
-	protected int termFrequency;
-	/** The offset in bytes in the inverted file of the term.*/
-	protected long endOffset;
-	/** The offset in bits in the starting byte in the inverted file.*/
-	protected byte endBitOffset;
+public abstract class LexiconOutputStream<KEY> implements Closeable {
+	
 	/** A data input stream to read from the bufferInput.*/
 	protected DataOutput lexiconStream = null;
 	/** Pointer written - the sum of the Nts */
@@ -67,57 +41,7 @@
 	protected long numTokensWritten = 0;
 	protected int numTermsWritten = 0;
 
-	 /** A constructor for child classes that doesnt open the file */
-	protected LexiconOutputStream(long a, long b, long c) { }
-
-	/**
-	 * A default constructor.
-	 */
-	public LexiconOutputStream() {
-		try {
-			lexiconStream = new DataOutputStream(Files.writeFileStream(ApplicationSetup.LEXICON_FILENAME));
-		} catch (IOException ioe) {
-			logger.fatal(
-				"I/O error occured while opening the lexicon file. Stack trace follows.",ioe);
-		}
-	}
-	/** Create a lexicon using the specified data stream */
-	public LexiconOutputStream(DataOutput out){
-		lexiconStream = out;
-	}
-	
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public LexiconOutputStream(String filename) {
-		try {
-			lexiconStream = new DataOutputStream(Files.writeFileStream(filename));
-		} catch (IOException ioe) {
-			logger.fatal(
-				"I/O error occured while opening the lexicon file. Stack trace follows.",ioe);
-		}
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param file java.io.File the name of the lexicon file.
-	 */
-	public LexiconOutputStream(File file) {
-		try {
-			lexiconStream = new DataOutputStream(Files.writeFileStream(file));
-		} catch (IOException ioe) {
-			logger.fatal(
-				"I/O error occured while opening the lexicon file. Stack trace follows.",ioe);
-		}
-	}
-
-	/** A constructor for a LexiconOutputStream given the index path and prefix
-	  * @param path String the path to the index
-	  * @param prefix String the prefix of the filenames in the index
-	  */
-	public LexiconOutputStream(String path, String prefix) {
-		this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-	}
+	protected LexiconOutputStream() { }
 
 
 	/**
@@ -134,75 +58,16 @@
 	 * Writes a lexicon entry.
 	 * @return the number of bytes written to the file. 
 	 * @throws java.io.IOException if an I/O error occurs
-	 * @param _term the string representation of the term
-	 * @param _termId the terms integer identifier
-	 * @param _documentFrequency the term's document frequency in the collection
-	 * @param _termFrequency the term's frequency in the collection
-	 * @param _endOffset the term's ending byte offset in the inverted file
-	 * @param _endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	public int writeNextEntry(
-		String _term,
-		int _termId,
-		int _documentFrequency,
-		int _termFrequency,
-		long _endOffset,
-		byte _endBitOffset)
-		throws IOException {
-		byte[] tmpBytes = _term.getBytes();
-		final int length = tmpBytes.length;
-		numPointersWritten += _documentFrequency;
-		numTokensWritten += _termFrequency;
-		numTermsWritten++;
-		lexiconStream.write(tmpBytes, 0, length);
-		/* if an ArrayIndexOutOfBoundsException ocurrs here
-		 * this means that the term is longer than STRING_BYTE_LENGTH */
-		lexiconStream.write(
-			zeroBuffer,
-			0,
-			ApplicationSetup.STRING_BYTE_LENGTH - length);
-		lexiconStream.writeInt(_termId);
-		lexiconStream.writeInt(_documentFrequency);
-		lexiconStream.writeInt(_termFrequency);
-		lexiconStream.writeLong(_endOffset);
-		lexiconStream.writeByte(_endBitOffset);
-		return Lexicon.lexiconEntryLength;
-	}
-	/**
-	 * Writes a lexicon entry.
-	 * @return the number of bytes written.
-	 * @throws java.io.IOException if an I/O error occurs
-	 * @param _term the byte[] representation of the term. Using this format means that
-	 * the term does not have to be decoded and recoded every time.
-	 * @param _termId the terms integer identifier
-	 * @param _documentFrequency the term's document frequency in the collection
-	 * @param _termFrequency the term's frequency in the collection
-	 * @param _endOffset the term's ending byte offset in the inverted file
-	 * @param _endBitOffset the term's ending byte bit-offset in the inverted file
+	 * @param _key the key - usually the term
+	 * @param _value the lexicon entry value
 	 */
-	public int writeNextEntry(
-		byte[] _term,
-		int _termId,
-		int _documentFrequency,
-		int _termFrequency,
-		long _endOffset,
-		byte _endBitOffset)
-		throws IOException {
-		final int length = _term.length;
-		numPointersWritten += _documentFrequency;
-		numTokensWritten += _termFrequency;
+	public abstract int writeNextEntry(KEY _key, LexiconEntry _value) throws IOException;
+	
+	protected void incrementCounters(EntryStatistics t)
+	{
 		numTermsWritten++;
-		lexiconStream.write(_term, 0, _term.length);
-		lexiconStream.write(
-			zeroBuffer,
-			0,
-		   	ApplicationSetup.STRING_BYTE_LENGTH - length);
-		lexiconStream.writeInt(_termId);
-		lexiconStream.writeInt(_documentFrequency);
-		lexiconStream.writeInt(_termFrequency);
-		lexiconStream.writeLong(_endOffset);
-		lexiconStream.writeByte(_endBitOffset);
-		return Lexicon.lexiconEntryLength;
+		numPointersWritten += t.getDocumentFrequency();
+		numTokensWritten += t.getFrequency();
 	}
 
 	/** Returns the number of pointers there would be in an inverted index built using this lexicon (thus far).
@@ -224,56 +89,4 @@
 	{
 		return numTermsWritten;
 	}
-
-	/**
-	 * Sets the bit offset in the last byte of the term's entry in the inverted file.
-	 * @param _endBitOffset byte the bit offset in the last byte of the 
-	 *		term's entry in the inverted file.
-	 * @deprecated
-	 */
-	public void setEndBitOffset(byte _endBitOffset) {
-		endBitOffset = _endBitOffset;
-	}
-	/**
-	 * Sets the ending offset of the term's entry in the inverted file.
-	 * @param _endOffset long The ending byte of the term's 
-	 *		entry in the inverted file.
-	 * @deprecated
-	 */
-	public void setEndOffset(long _endOffset) {
-		endOffset = _endOffset;
-	}
-	/**
-	 * Sets the document frequency for the given term.
-	 * @param _Nt int The document frequency for the given term.
-	 * @deprecated
-	 */
-	public void setNt(int _Nt) {
-		documentFrequency = _Nt;
-	}
-	/**
-	 * Sets the string representation of the term.
-	 * @param _term java.lang.String The string representation of 
-	 *		the seeked term.
-	 * @deprecated
-	 */
-	public void setTerm(String _term) {
-		term = _term;
-	}
-	/**
-	 * Sets the term's id.
-	 * @param _termId int the term's identifier.
-	 * @deprecated
-	 */
-	public void setTermId(int _termId) {
-		termId = _termId;
-	}
-	/**
-	 * Sets the term frequency for the already found term.
-	 * @param _termFrequency int The term frequency in the collection.
- 	 * @deprecated
-	 */
-	public void setTF(int _termFrequency) {
-		termFrequency = _termFrequency;
-	}
 }
Index: src/uk/ac/gla/terrier/structures/FSOMapFileLexicon.java
===================================================================
--- src/uk/ac/gla/terrier/structures/FSOMapFileLexicon.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/FSOMapFileLexicon.java	(revision 0)
@@ -0,0 +1,346 @@
+package uk.ac.gla.terrier.structures;
+import gnu.trove.TIntObjectHashMap;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.io.Text;
+
+import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
+import uk.ac.gla.terrier.structures.maps.FSOrderedMapFile;
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
+import uk.ac.gla.terrier.utility.Files;
+import uk.ac.gla.terrier.utility.io.RandomDataInput;
+/** Instance of a Lexicon where a FSOrderedMapFile is always used as a backing store.
+ * @author Craig Macdonald
+ * @since 3.0 */
+public class FSOMapFileLexicon extends MapLexicon
+{
+	static final String MAPFILE_EXT = ".fsomapfile";
+	static final String ID_EXT = ".fsomapid";
+	static final String HASH_EXT = ".fsomaphash";
+	
+	static class CharMapBSearchShortcut implements FSOrderedMapFile.FSOMapFileBSearchShortcut<Text>
+	{
+		final TIntObjectHashMap<int[]> map;
+		final int[] defaultReturn;
+		@SuppressWarnings("unchecked")
+		public CharMapBSearchShortcut(String path, String prefix, String structureName, int size) throws Exception
+		{
+			ObjectInputStream ois = new ObjectInputStream(Files.openFileStream(constructFilename(structureName, path, prefix, HASH_EXT)));
+			map = (TIntObjectHashMap<int[]>)ois.readObject();
+			ois.close();
+			defaultReturn = new int[]{0,size};
+		}
+		
+		public int[] searchBounds(Text key) throws IOException {
+			int[] boundaries = map.get(key.charAt(0));
+			if (boundaries == null)
+				return defaultReturn;
+			return boundaries;
+		}	
+	}
+	
+    static class OnDiskLookup implements Id2EntryIndexLookup, java.io.Closeable
+    {
+        final RandomDataInput lexIdFile;
+        protected static final long SIZE_OF_INT = 4;
+        public OnDiskLookup(String path, String prefix, String structureName) throws IOException
+        {
+            lexIdFile = Files.openFileRandom(
+            		constructFilename(structureName, path, prefix, ID_EXT));
+        }
+        
+        public int getIndex(int termid) throws IOException
+        {
+            lexIdFile.seek(SIZE_OF_INT * (long)termid);
+            return lexIdFile.readInt();
+        }
+        
+        public void close() throws IOException
+        {
+            lexIdFile.close();
+        }
+    }
+           
+    static class InMemoryLookup implements Id2EntryIndexLookup
+    {
+        protected final int[] id2index;
+        public InMemoryLookup(String path, String prefix, String structureName, int size) 
+            throws IOException
+        {
+            DataInputStream lexIdFile = new DataInputStream(Files.openFileStream(
+            		constructFilename(structureName, path, prefix, ID_EXT)));
+            id2index = new int[size];
+            for(int i=0;i<size;i++)
+            {
+                id2index[i] = lexIdFile.readInt();
+            }
+            lexIdFile.close();
+        }
+        
+        public int getIndex(int termid)
+        {
+            return id2index[termid];
+        }
+    }
+    
+    /** Construct a new FSOMapFileLexicon */
+    @SuppressWarnings("unchecked")
+	public FSOMapFileLexicon(String structureName, Index index) throws IOException
+    {
+    	this(
+    		structureName, 
+    		index.getPath(), 
+    		index.getPrefix(), 
+    		(FixedSizeWriteableFactory<Text>)index.getIndexStructure(structureName+"-keyfactory"),
+    		(FixedSizeWriteableFactory<LexiconEntry>)index.getIndexStructure(structureName+"-valuefactory"),
+    		index.getIndexProperty("index."+structureName+".termids", "aligned"),
+    		index.getIndexProperty("index."+structureName+".bsearchshortcut", "default")
+    		);
+    }
+    
+    public FSOMapFileLexicon(String structureName, String path, String prefix, 
+    		FixedSizeWriteableFactory<Text> keyFactory,
+    		FixedSizeWriteableFactory<LexiconEntry> valueFactory,
+    		String termIdLookup, String termLookup) throws IOException
+    {
+    	super(
+                new FSOrderedMapFile<Text,LexiconEntry>(
+                	constructFilename(structureName, path, prefix, MAPFILE_EXT),
+                    false,
+                    keyFactory,
+                    valueFactory)
+                );
+    	this.keyFactory = keyFactory;
+    	if (termIdLookup.equals("aligned"))
+        {
+            setTermIdLookup(new IdIsIndex());
+        }
+        else if (termIdLookup.equals("file"))
+        {
+            setTermIdLookup(new OnDiskLookup(path, prefix, structureName));
+        }
+        else if (termIdLookup.equals("fileinmem"))
+        {
+            setTermIdLookup(new InMemoryLookup(path, prefix, structureName, this.map.size()));
+        }
+        else if (termIdLookup.equals("disabled"))
+        {
+        	setTermIdLookup(null);
+        }
+        else
+        {
+            throw new IOException("Unrecognised value ("+termIdLookup+") for termIdlookup for structure "+structureName);
+        }
+    	
+    	if (termLookup.equals("charmap"))
+    	{
+    		try{
+    			((FSOrderedMapFile<Text,LexiconEntry>)this.map).setBSearchShortcut(
+    				new CharMapBSearchShortcut(path, prefix, structureName, this.map.size()));
+    		} catch (Exception e) {
+    			throw new IOException("Problem loading FSOMapFileBSearchShortcut for "+structureName+": "+ e.getMessage()); 
+    		}
+    	}
+    	else if (termLookup.equals("default"))
+    	{
+    		//do nothing
+    	}
+    	else
+    	{
+    		throw new IOException("Unrecognised value ("+termLookup+") for termLookup for structure "+structureName);
+    	}
+    }
+
+	@Override
+	public void close() {
+		super.close();
+	}
+	
+	public static class MapFileLexiconIterator 
+		implements Iterator<Entry<String, LexiconEntry>>, Closeable
+	{
+		protected Iterator<Entry<Text, LexiconEntry>> parent;
+		
+		@SuppressWarnings("unchecked")
+		public MapFileLexiconIterator(String structureName, Index index) throws IOException
+		{
+			this(
+				structureName, 
+	    		index.getPath(), 
+	    		index.getPrefix(), 
+	    		(FixedSizeWriteableFactory<Text>)index.getIndexStructure(structureName+"-keyfactory"),
+	    		(FixedSizeWriteableFactory<LexiconEntry>)index.getIndexStructure(structureName+"-valuefactory"));
+		}
+		
+		public MapFileLexiconIterator(String structureName, String path, String prefix, 
+	    		FixedSizeWriteableFactory<Text> keyFactory,
+	    		FixedSizeWriteableFactory<LexiconEntry> valueFactory) throws IOException
+		{
+			this(constructFilename(structureName, path, prefix, MAPFILE_EXT), keyFactory, valueFactory);
+		}
+		
+		public MapFileLexiconIterator(String filename, FixedSizeWriteableFactory<Text> keyFactory,
+	    		FixedSizeWriteableFactory<LexiconEntry> valueFactory) throws IOException
+	    {
+			this(new FSOrderedMapFile.EntryIterator<Text, LexiconEntry>(filename, keyFactory, valueFactory));
+	    }
+		
+		public MapFileLexiconIterator(Iterator<Entry<Text, LexiconEntry>> _parent)
+		{
+			parent = _parent;
+		}
+		public boolean hasNext() {
+			return parent.hasNext();
+		}
+		public Entry<String, LexiconEntry> next() {
+			return MapLexicon.toStringEntry(parent.next());
+		}
+		public void remove() {
+			parent.remove();
+		}
+		
+		public void close() {
+			if (parent instanceof Closeable)
+				((Closeable)parent).close();
+		}
+	}
+
+	public Iterator<Entry<String, LexiconEntry>> iterator() {
+		return new MapFileLexiconIterator(this.map.entrySet().iterator());
+	}
+	
+	/** Does two things to a FSOMapFileLexicon: adds the termid lookup file (if required),
+	 * and also creates the lexicon has file.
+	 * @param structureName - name of the index structure that this FSOMapFileLexicon represents
+	 * @param index - the index that the index belongs
+	 * @throws IOException if an IO problem occurs
+	 */
+	@SuppressWarnings("unchecked")
+	public static void optimise(String structureName, Index index, LexiconBuilder.CollectionStaticticsCounter<LexiconEntry> statsCounter) throws IOException
+	{
+		final String mapFileFilename = constructFilename(structureName, index.getPath(), index.getPrefix(), MAPFILE_EXT);
+		final FixedSizeWriteableFactory<Text> keyFactory = 
+			(FixedSizeWriteableFactory<Text>)index.getIndexStructure(structureName+"-keyfactory");
+		final FixedSizeWriteableFactory<LexiconEntry> valueFactory = 
+			(FixedSizeWriteableFactory<LexiconEntry>)index.getIndexStructure(structureName+"-valuefactory");
+		final int numEntries = FSOrderedMapFile.numberOfEntries(mapFileFilename, keyFactory, valueFactory);
+		
+		//term id lookups
+		boolean termIdsAligned = true;
+		int[] termid2index = new int[numEntries];
+		int counter= 0; int lastTermId = -1;
+		
+		//bsearch reduction
+		int previousFirstChar = -1;
+		int firstChar = 0;
+		final TIntObjectHashMap<int[]> map = new TIntObjectHashMap<int[]>();
+		
+		Iterator<Map.Entry<Text,LexiconEntry>> iterator = 
+			new FSOrderedMapFile.EntryIterator<Text, LexiconEntry>(mapFileFilename, keyFactory, valueFactory);
+		while(iterator.hasNext())
+		{
+			Map.Entry<Text,LexiconEntry> lee = iterator.next();
+			//System.err.println(lee.toString());
+			
+			//term id
+			int termId = lee.getValue().getTermId();
+			if (! (termId == lastTermId+1))
+				termIdsAligned = false;
+			termid2index[termId] = counter;
+			lastTermId = termId;
+			
+			//bsearch reduction
+			firstChar = lee.getKey().charAt(0);
+			if (firstChar!=previousFirstChar) {
+				int[] boundaries = new int[] {counter, 0};
+				map.put(firstChar, boundaries);
+				previousFirstChar = firstChar;
+			}
+			
+			//increments
+			statsCounter.count(lee.getValue());
+			counter++;
+		}
+		if (iterator instanceof Closeable)
+			((Closeable)iterator).close();
+		
+		//deal with termids
+		if (termIdsAligned)
+		{
+			index.setIndexProperty("index."+structureName+".termids", "aligned");
+			System.err.println("All ids for structure "+structureName+ " are aligned, skipping "
+				+ID_EXT+ " file");
+		}
+		else
+		{
+			DataOutputStream dos = new DataOutputStream(Files.writeFileStream(
+					constructFilename(structureName, index.getPath(), index.getPrefix(), ID_EXT)));
+			for(int indexof : termid2index)
+				dos.writeInt(indexof);
+			dos.close();
+			index.setIndexProperty("index."+structureName+".termids", (numEntries > 15000000) ? "file" : "fileinmem");
+		}
+		
+		
+		int[] mapKeys = map.keys();
+		Arrays.sort(mapKeys);
+		final int mapKeysSize = mapKeys.length;
+		for (int i=0; i<mapKeysSize-1; i++) {
+			int nextLowerBoundary = (map.get(mapKeys[i+1]))[0];
+			int[] currentBoundaries = map.get(mapKeys[i]);
+			currentBoundaries[1] = nextLowerBoundary;
+			map.put(mapKeys[i], currentBoundaries);
+		}
+		//do something about the last entry
+		int nextLowerBoundary = counter;
+		int[] currentBoundaries = (int[])map.get(mapKeys[mapKeysSize-1]);
+		currentBoundaries[1] = nextLowerBoundary;
+		map.put(mapKeys[mapKeysSize-1], currentBoundaries);
+		
+		final ObjectOutputStream oos = new ObjectOutputStream(Files.writeFileStream(
+				constructFilename(structureName, index.getPath(), index.getPrefix(), HASH_EXT)));
+		oos.writeObject(map);
+		oos.close();
+		index.setIndexProperty("index."+structureName+".bsearchshortcut", "charmap");
+		index.flush();
+	}
+	
+	static String constructFilename(String structureName, String path, String prefix, String extension)
+	{
+		return path 
+	        + "/"+ prefix 
+	        +"." + structureName + extension;
+	}
+	
+	/** Rename a FSOMapFileLexicon within the specified index location */
+	public static void renameMapFileLexicon(
+			String SrcStructureName, String SrcPath, String SrcPrefix,
+			String destStructureName, String destPath, String destPrefix)
+	{
+		for(String extension : new String[]{HASH_EXT, ID_EXT, MAPFILE_EXT })
+		{
+			Files.rename(
+					constructFilename(SrcStructureName, SrcPath, SrcPrefix, extension),
+					constructFilename(destStructureName, destPath, destPrefix, extension)
+				);
+		}
+	}
+	
+	/** Delete a FSOMapFileLexicon within the specified index location */
+	public static void deleteMapFileLexicon(String structureName, String path, String prefix)
+	{
+		for(String extension : new String[]{HASH_EXT, ID_EXT, MAPFILE_EXT })
+		{
+			Files.delete(constructFilename(structureName, path, prefix, extension));
+		}
+	}
+}
Index: src/uk/ac/gla/terrier/structures/BlockEntryStatistics.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockEntryStatistics.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/BlockEntryStatistics.java	(revision 0)
@@ -0,0 +1,7 @@
+package uk.ac.gla.terrier.structures;
+
+public interface BlockEntryStatistics extends EntryStatistics {
+	/** The number of blocks that this term has. 
+	 * Needed by the BlockInvertedIndexBuilder. */
+	public int getBlockCount();
+}
Index: src/uk/ac/gla/terrier/structures/UTFLexiconOutputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/UTFLexiconOutputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/UTFLexiconOutputStream.java	(working copy)
@@ -1,146 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is LexiconOutputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> (original author) 
- *   Craig Macdonald <craigm{a.}.dcs.gla.ac.uk>
- */
-package uk.ac.gla.terrier.structures;
-import java.io.DataOutput;
-import java.io.File;
-import java.io.IOException;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.StringTools;
-/**
- * This class implements an output stream for the lexicon structure.
- * @author Vassilis Plachouras, Craig Macdonald
- * @version $Revision: 1.12 $
- */
-public class UTFLexiconOutputStream extends LexiconOutputStream {
-	/** A zero buffer for writing to the file.*/
-	private byte[] zeroBuffer = new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-
-	/**
-	 * A default constructor.
-	 */
-	public UTFLexiconOutputStream() {
-		super();
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public UTFLexiconOutputStream(String filename) {
-		super(filename);
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param file java.io.File the name of the lexicon file.
-	 */
-	public UTFLexiconOutputStream(File file) {
-		super(file);
-	}
-	
-	public UTFLexiconOutputStream(String path, String prefix)
-	{
-		super(path, prefix);
-	}
-	
-	/** Create a lexicon using the specified data stream */
-	public UTFLexiconOutputStream(DataOutput out){
-		super(out);
-	}
-
-	/**
-	 * Writes a lexicon entry.
-	 * @return the number of bytes written to the file. 
-	 * @throws java.io.IOException if an I/O error occurs
-	 * @param _term the string representation of the term
-	 * @param _termId the terms integer identifier
-	 * @param _documentFrequency the term's document frequency in the collection
-	 * @param _termFrequency the term's frequency in the collection
-	 * @param _endOffset the term's ending byte offset in the inverted file
-	 * @param _endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	public int writeNextEntry(
-		String _term,
-		int _termId,
-		int _documentFrequency,
-		int _termFrequency,
-		long _endOffset,
-		byte _endBitOffset)
-		throws IOException {
-		numPointersWritten += _documentFrequency;
-        numTokensWritten += _termFrequency;
-        numTermsWritten++;
-		lexiconStream.writeUTF(_term);
-		lexiconStream.write(
-				zeroBuffer,
-				0,
-				ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(_term));
-		lexiconStream.writeInt(_termId);
-		lexiconStream.writeInt(_documentFrequency);
-		lexiconStream.writeInt(_termFrequency);
-		lexiconStream.writeLong(_endOffset);
-		lexiconStream.writeByte(_endBitOffset);
-		return UTFLexicon.lexiconEntryLength;
-	}
-	/**
-	 * Writes a lexicon entry.
-	 * @return the number of bytes written.
-	 * @throws java.io.IOException if an I/O error occurs
-	 * @param _term the byte representation of the term, as written by DataInput.writeUTF(). This
-	 * should be ApplicationSetup.STRING_BYTE_LENGTH +2 in length
-	 * @param _termId the terms integer identifier
-	 * @param _documentFrequency the term's document frequency in the collection
-	 * @param _termFrequency the term's frequency in the collection
-	 * @param _endOffset the term's ending byte offset in the inverted file
-	 * @param _endBitOffset the term's ending byte bit-offset in the inverted file
-	 */
-	
-	public int writeNextEntry(
-		byte[] _term,
-		int _termId,
-		int _documentFrequency,
-		int _termFrequency,
-		long _endOffset,
-		byte _endBitOffset)
-		throws IOException {
-		final int length = _term.length;
-		numPointersWritten += _documentFrequency;
-        numTokensWritten += _termFrequency;
-		numTermsWritten++;
-		lexiconStream.write(_term, 0, length);
-		lexiconStream.write(
-            zeroBuffer,
-       	    0,
-			2+ApplicationSetup.STRING_BYTE_LENGTH - length);	
-		lexiconStream.writeInt(_termId);
-		lexiconStream.writeInt(_documentFrequency);
-		lexiconStream.writeInt(_termFrequency);
-		lexiconStream.writeLong(_endOffset);
-		lexiconStream.writeByte(_endBitOffset);
-		return UTFLexicon.lexiconEntryLength;
-	}
-
-}
Index: src/uk/ac/gla/terrier/structures/Lexicon.java
===================================================================
--- src/uk/ac/gla/terrier/structures/Lexicon.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/Lexicon.java	(working copy)
@@ -1,654 +1,61 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is Lexicon.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Gianni Amati <gba{a.}fub.it> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
 package uk.ac.gla.terrier.structures;
-import gnu.trove.TIntObjectHashMap;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.util.Iterator;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.Files;
-import uk.ac.gla.terrier.utility.io.RandomDataInput;
-import uk.ac.gla.terrier.utility.io.RandomDataOutput;
-
-import org.apache.log4j.Logger;
-/**
- * The class that implements the lexicon structure. Apart from the lexicon file,
- * which contains the actual data about the terms, and takes its name from
- * ApplicationSetup.LEXICON_FILENAME, another file is created and
- * used, containing a mapping from the term's code to the offset of the term 
- * in the lexicon. The name of this file is given by 
- * ApplicationSetup.LEXICON_INDEX_FILENAME.
- * 
- * @see ApplicationSetup#LEXICON_FILENAME
- * @see ApplicationSetup#LEXICON_INDEX_FILENAME
- * @author Gianni Amati, Vassilis Plachouras
- * @version $Revision: 1.47 $
- */
-public class Lexicon implements Iterable<String>, Closeable{
-	/** The logger used for the Lexicon */
-	protected Logger logger = Logger.getRootLogger();
-	
-	/** The term represented as an array of bytes.*/
-	protected byte[] termCharacters;
-	
-	/** The term represented as a string.*/
-	protected String term;
-	
-	/** An integer representing the id of the term.*/
-	protected int termId;
-	
-	/** The document frequency of the term.*/
-	protected int documentFrequency;
-	
-	/** The term frequency of the term.*/
-	protected int termFrequency;
-	
-	/** The offset in bytes in the inverted file of the term.*/
-	protected long startOffset;
-	
-	/** The offset in bits in the starting byte in the inverted file.*/
-	protected byte startBitOffset;
-	
-	/** The offset in bytes in the inverted file of the term.*/
-	protected long endOffset;
-	
-	/** The offset in bits in the ending byte in the inverted file.*/
-	protected byte endBitOffset;
-	
-	/** 
-	 * The size in bytes of an entry in the lexicon file.
-	 * An entry corresponds to a string, an int (termCode), 
-	 * an int (docf), an int (tf), a long (the offset of the end 
-	 * of the term's entry in bytes in the inverted file) and
-	 * a byte (the offset in bits of the last byte of the term's entry 
-	 * in the inverted file.
-	 */
-	public static final int lexiconEntryLength =
-		ApplicationSetup.STRING_BYTE_LENGTH //the string representation
-		+12 //the three integers
-		+8 //the long
-		+1; //the byte
-	
-	/** The file containing the mapping from the codes to the offset in the lexicon file.*/
-	protected RandomDataInput idToOffsetFile;
-	
-	/** The actual lexicon file.*/
-	protected RandomDataInput lexiconFile;
-
-	/** Filename of the of lexicon file opened */
-	protected String lexiconFileName;
-	
-	/** The number of entries in the lexicon file.*/
-	protected int numberOfLexiconEntries;
-	
-	/** A buffer for reading from the lexicon file.*/
-	protected byte[] buffer = new byte[512];
-	
-	/** A second buffer for finding terms.*/
-	protected byte[] bt = new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-	
-	/** A byte input stream to read from the buffer.*/
-	protected ByteArrayInputStream bufferInput = new ByteArrayInputStream(buffer);
-	
-	/** A data input stream to read from the bufferInput.*/
-	protected DataInputStream dataInput = new DataInputStream(bufferInput);
-	
-	/** 
-	 * A hashmap that is used in order to reduce the number 
-	 * of random accesses on disk during the binary search
-	 */
-	protected TIntObjectHashMap map = null;
-
-	/** Controls whether to use the hash for speeding up 
-	 * lexicon entry lookups or not. The corresponding
-	 * property is <tt>lexicon.use.hash</tt>.
-	 */
-	protected boolean USE_HASH = Boolean.parseBoolean(ApplicationSetup.getProperty("lexicon.use.hash","true"));
 
-	protected Class inputStreamClass = LexiconInputStream.class;
-	
-	/** Contructor for child classes which dont want to open a file */
-	protected Lexicon(long a, long b, long c) {}
-	
-	/** 
-	 * A default constructor.
-	 */
-	public Lexicon() {
-		this(ApplicationSetup.LEXICON_FILENAME);
-	}
+import java.util.Map;
 
-	public Lexicon(String path, String prefix)
+public abstract class Lexicon<KEY> implements Closeable, Iterable<Map.Entry<KEY,LexiconEntry>>
 	{
-		this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-	}
-	
-	/**
-	 * Constructs an instace of Lexicon and opens
-	 * the corresponding file.
-	 * 
-	 * @param lexiconName the name of the lexicon file.
-	 */
-	public Lexicon(String lexiconName) {
-		boolean updateable = false;
-		try {
-			lexiconFile = updateable
-				? Files.writeFileRandom(this.lexiconFileName = lexiconName)
-				: Files.openFileRandom(this.lexiconFileName = lexiconName);
-			idToOffsetFile = Files.openFileRandom(lexiconName.substring(0,lexiconName.lastIndexOf(".")).concat(ApplicationSetup.LEXICON_INDEX_SUFFIX));
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)lexiconEntryLength);
-			
-			if (USE_HASH) {
-				try{
-					String hashFilename = lexiconName.substring(0,lexiconName.lastIndexOf(".")).concat(ApplicationSetup.LEXICON_HASH_SUFFIX);
-					ObjectInputStream ois = new ObjectInputStream(Files.openFileStream(hashFilename));
-					map = (TIntObjectHashMap)ois.readObject();
-					ois.close();
-				}
-				catch (IOException ioe) {
-					logger.warn("Input/output exception while reading the hashmap used for the lexicon. Hash will not be used." + ioe);
-					USE_HASH = false;
-				} catch (ClassNotFoundException cnfe) {
-					logger.warn("ClassNotFoundException while reading the hashmap used for the lexicon. Hash will not be used." + cnfe);
-					USE_HASH = false;
-				}
-			}//USE_HASH
-		} catch (IOException ioe) {
-			logger.error("Input/output exception while opening for reading the lexicon file: " + ioe);
-		}
-
-	}
-	
-	/**
-	* Closes the lexicon and lexicon index files.
-	*/
-	public void close() {
-		try {
-			idToOffsetFile.close();
-			lexiconFile.close();
-		} catch (IOException ioe) {
-			logger.error("Input/output exception while closing the lexicon file: " + ioe);
-		}
-	}
-	
-	/** 
-	 * Prints out the contents of the lexicon file. 
-	 * Streams are used to read the lexicon file.
-	 */
-	public void print() {
-		LexiconInputStream tmp=null;
-		try{
-			tmp = (LexiconInputStream)inputStreamClass.getConstructor(String.class).newInstance(this.lexiconFileName);
-		} catch (Exception e) {logger.error(e); return;}
-		final LexiconInputStream _lis=tmp;
-		_lis.print();
-	}
-
-	/**
-	 * Finds the term given its term code.
-	 *
-	 * @return true if the term is found, else return false
-	 * @param _termId the term's identifier
-	 */
-	public boolean findTerm(int _termId) {
-		try {
-			idToOffsetFile.seek((long)_termId * 8L);
-			return seekEntry((int) (idToOffsetFile.readLong()/(long)lexiconEntryLength));
-		} catch(IOException ioe) {
-			logger.error("Input/Output exception while reading the lexicon index file for termid "+_termId+": ", ioe);
-		}
-		return false;
-		
-	}
-	/** 
-	 * Performs a binary search in the lexicon
-	 * in order to locate the given term.
-	 * If the term is located, the properties
-	 * termCharacters, documentFrequency,
-	 * termFrequency, startOffset, startBitOffset,
-	 * endOffset and endBitOffset contain the
-	 * values related to the term.
-	 * @param _term The term to search for.
-	 * @return true if the term is found, and false otherwise.
-	 */
-	public boolean findTerm(String _term) {
-		int low = -1;
-		int high = numberOfLexiconEntries;
-		int i;
-		int compareStrings;
-
-		if (USE_HASH) {
-			int firstChar = _term.charAt(0);
-			int[] boundaries = (int[])map.get(firstChar);
-			low = boundaries[0];
-			high = boundaries[1];
-
-		}
-
-		//if (logger.isDebugEnabled()) 
-		//	logger.debug("lexicon hash low high for term " + _term + " are: " + low + " " + high);
-		
-		try {
-			while (high-low>1) {
-				
-				i = (high + low)/2;
-				
-				lexiconFile.seek((long)i * (long)lexiconEntryLength);
-				lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-				term = new String(buffer,0,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-							
-				if ((compareStrings = _term.compareTo(term))< 0)
-					high = i;
-				else if (compareStrings > 0)
-					low = i;
-				else { 
-					seekEntry(i);
-					return true;
-				}
-					
-			
-			}
-		} catch(IOException ioe) {
-			logger.fatal("IOException while binary searching the lexicon: " + ioe);
-		}
-		
-		if (high == numberOfLexiconEntries)
-			return false;
-		
-		seekEntry(high);
-		if (_term.compareTo(term) == 0) 
-			return true; 
-		return false;
-	}
-
-	/**
-	 * Returns the bit offset in the last byte of 
-	 * the term's entry in the inverted file.
-	 * @deprecated
-	 * @return byte the bit offset in the last byte of 
-	 *		 the term's entry in the inverted file
-	 */
-	public byte getEndBitOffset() {
-		return endBitOffset;
-	}
-	/**
-	 * Returns the ending offset of the term's entry in the inverted file.
-	 * @deprecated
-	 * @return long The ending byte of the term's entry in the inverted file.
-	 */
-	public long getEndOffset() {
-		return endOffset;
-	}
-	/**
-	 * Return the document frequency for the given term.
-	 * @deprecated
-	 * @return int The document frequency for the given term
-	 */
-	public int getNt() {
-		return documentFrequency;
-	}
-	/**
-	 * Returns the number of entries in the lexicon.
-	 * @return the number of entries in the lexicon.
-	 * @deprecated
-	 */
-	public long getNumberOfLexiconEntries() {
-		return numberOfLexiconEntries;
-	}
-	/**
-	 * The bit offset in the starting byte of 
-	 * the entry in the inverted file.
-	 * @deprecated
-	 * @return byte The number of bits in the first 
-	 *		 byte of the entry in the inverted file
-	 */
-	public byte getStartBitOffset() {
-		return startBitOffset;
-	}
-	/**
-	 * Returns the beginning of the term's entry in the inverted file.
-	 * @deprecated
-	 * @return long the start offset (in bytes) in the inverted file
-	 */
-	public long getStartOffset() {
-		return startOffset;
-	}
-	/**
-	 * Insert the method's description here.
-	 * @deprecated
-	 * @return java.lang.String The string representation of the seeked term.
-	 */
-	public String getTerm() {
-		return this.term.trim();
-	}
-	/**
-	 * Returns the term's id.
-	 * @deprecated
-	 * @return int the term's id.
-	 */
-	public int getTermId() {
-		return termId;
-	}
-	/**
-	 * Returns the term frequency for the already seeked term.
-	 *
-	 * @return int The term frequency in the collection.
-	 * @deprecated
-	 */
-	public int getTF() {
-		return termFrequency;
-	}
-	/**
-	 * Seeks the i-th entry of the lexicon.
-	 * TODO read a byte array from the file and decode it, 
-	 * 		instead of reading the different pieces of 
-	 *	  information separately.
-	 * @param i The index of the entry we are looking for.
-	 * @return true if the entry was found, false otherwise.
-	 */
-	public boolean seekEntry(int i) {
-		try {
-			if (i >= numberOfLexiconEntries || i < 0)
-				return false;
-			else {
-				if (i == 0) {
-					lexiconFile.seek(0);
-					startOffset = 0;
-					startBitOffset = 0;
-					lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-					dataInput.reset();
-					term = new String(buffer,0,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-				} else {
-					lexiconFile.seek((i-1) * (long)lexiconEntryLength + (long)(ApplicationSetup.STRING_BYTE_LENGTH + 12));
-					lexiconFile.readFully(buffer, 0, lexiconEntryLength + 9);
-					dataInput.reset();
-					startOffset = dataInput.readLong();
-					startBitOffset = dataInput.readByte();
-					if (++startBitOffset == 8) {
-						startBitOffset = 0;
-						startOffset++;
-					}
-					term = new String(buffer, 9, ApplicationSetup.STRING_BYTE_LENGTH).trim();					
-				}
-				dataInput.skipBytes(ApplicationSetup.STRING_BYTE_LENGTH);
-				termId = dataInput.readInt();
-				documentFrequency = dataInput.readInt();
-				termFrequency = dataInput.readInt();
-				endOffset = dataInput.readLong();
-				endBitOffset = dataInput.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.error("Input/Output exception while reading the idToOffset file. ", ioe);
-		}
-		return false;
-	}
-
-	
-	/**
-	 * In an already stored entry in the lexicon
-	 * file, the information about the term frequency,
-	 * the endOffset in bytes, and the endBitOffset in the last
-	 * byte, is updated. The term is specified by the index of the entry.
-	 *
-	 * @return true if the information is updated properly, 
-	 *		 otherwise return false
-	 * @param i the i-th entry
-	 * @param frequency the term's Frequency
-	 * @param endOffset the offset of the ending byte in the inverted file
-	 * @param endBitOffset the offset in bits in the ending byte 
-	 *		in the term's entry in inverted file
-	 * @deprecated The Lexicon class is only used for reading the
-	 *			 lexicon file, and not for writing any information.
-	 */
-	public boolean updateEntry(
-		int i,
-		int frequency,
-		long endOffset,
-		byte endBitOffset) {
-		
-		if (! (lexiconFile instanceof RandomDataOutput))
-			return false;
-		RandomDataOutput _lexiconFile = (RandomDataOutput)lexiconFile;
-		try {
-			long lexiconOffset = (long)i * (long)lexiconEntryLength;
-			//we seek the offset where the frequency should be writen
-			_lexiconFile.seek(
-				lexiconOffset + ApplicationSetup.STRING_BYTE_LENGTH + 8);
-			_lexiconFile.writeInt(frequency);
-			_lexiconFile.writeLong(endOffset);
-			_lexiconFile.writeByte(endBitOffset);
-		} catch (IOException ioe) {
-			logger.error("Input/Output exception while writing to the lexicon file. ", ioe);
-		}
-		return false;
-	}
-
-
-	/** Returns the number of entries in the lexicon file specified by f.
-	  * @param f The file to find the number of entries in
-	  */
-	public static int numberOfEntries(File f) {
-		return (int) ( f.length()/(long)lexiconEntryLength );
-	}
-
-	/** Returns the number of entries in the lexicon file specified by filename.
-	  * @param filename
-	  */
-	public static int numberOfEntries(String filename) {
-		return numberOfEntries(new File(filename));
-	}
-
-	
-	/** Returns a LexiconEntry describing all the information in the lexicon about the ith term 
-	 * in the lexicon.
-	 * @param termNumber The ith term in the lexicon. i is 0-based, and runs to getNumberOfLexiconEntries()-1
-	 * @return LexiconEntry all information about the term's entry in the lexicon. null if termid not found
-	 */
-	public LexiconEntry getIthLexiconEntry(int termNumber) {
-		if (! seekEntry(termNumber))
-			return null;
-		LexiconEntry le = new LexiconEntry();
-		le.termId = this.termId;
-		le.term = this.term.trim();
-		le.TF = this.termFrequency;
-		le.n_t = this.documentFrequency;
-		le.startOffset = this.startOffset;
-		le.startBitOffset = this.startBitOffset;
-		le.endOffset = this.endOffset;
-		le.endBitOffset = this.endBitOffset;
-		return le;
-	}
-	
-	/** Returns a LexiconEntry describing all the information in the lexicon about the term
-	  * denoted by termid
-	  * @param termid the termid of the term of interest
-	  * @return LexiconEntry all information about the term's entry in the lexicon. null if termid not found */
-	public LexiconEntry getLexiconEntry(int termid) {
-		/* TODO: improve this to the effectiveness level of getLexiconEntry() */
-		if (! findTerm(termid))
-			return null;
-		LexiconEntry le = new LexiconEntry();
-		le.termId = this.termId;
-		le.term = this.term.trim();
-		le.TF = this.termFrequency;
-		le.n_t = this.documentFrequency;
-		le.startOffset = this.startOffset;
-		le.startBitOffset = this.startBitOffset;
-		le.endOffset = this.endOffset;
-		le.endBitOffset = this.endBitOffset;
-		return le;
-	}
-	
-	/** Returns a LexiconEntry describing all the information in the lexicon about the term
-	  * denoted by _term
-	  * @param _term the String term that is of interest
-	  * @return LexiconEntry all information about the term's entry in the lexicon. null if termid not found */
-	public LexiconEntry getLexiconEntry(String _term) {
-		int low = -1;
-		int high = numberOfLexiconEntries;
-		int i;
-		int compareStrings;
-		String term;
-		byte[] buffer = new byte[lexiconEntryLength+9]; //to get the start offsets as well
-		
-		if (USE_HASH) {
-			int firstChar = _term.charAt(0);
-			int[] boundaries = (int[])map.get(firstChar);
-			if (boundaries != null)
+    static class LexiconFileEntry<KEY2> implements Map.Entry<KEY2,LexiconEntry>
 			{
-				low = boundaries[0];
-				high = boundaries[1];
-			}
-			//System.out.println("lexicon use hash: " + low + " " + high);
-		}
-		
-		try {
-			while (high-low>1) {
+        KEY2 key;
+        LexiconEntry value;
 				
-				i = (high + low)/2;
-				if (i==0) {
-					lexiconFile.seek(0);
-					lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-					term = new String(buffer,0,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-				} else {
-					lexiconFile.seek((long)i * (long)(lexiconEntryLength)-9L);
-					lexiconFile.readFully(buffer, 0, lexiconEntryLength+9);
-					term = new String(buffer,9,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-				}
-							
-				if ((compareStrings = _term.compareTo(term))< 0)
-					high = i;
-				else if (compareStrings > 0)
-					low = i;
-				else { //read the rest and return the data
-					return getLexiconEntryFromBuffer(buffer, term, i);
-				}
-			}
-		
-			if (high == numberOfLexiconEntries)
-				return null;
-			
-			if (high == 0) {
-				lexiconFile.seek(0);
-				lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-				term = new String(buffer,0,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-			} else {
-				lexiconFile.seek((long)high * (long)(lexiconEntryLength)-9L);
-				lexiconFile.readFully(buffer, 0, lexiconEntryLength+9);
-				term = new String(buffer,9,ApplicationSetup.STRING_BYTE_LENGTH).trim();				
+        public LexiconFileEntry(KEY2 k, LexiconEntry v)
+        {
+            this.key = k;
+            this.value = v;
 			}
 			
-			if (_term.compareTo(term) == 0) {
-				return getLexiconEntryFromBuffer(buffer, term, high);
-			}	
-		} catch(IOException ioe) {
-			logger.fatal("IOException while binary searching the lexicon: " + ioe);
-		}
-		return null;
+        public int hashCode()
+        {
+            LexiconFileEntry e = this;
+            return (e.getKey()==null   ? 0 : e.getKey().hashCode()) ^
+             (e.getValue()==null ? 0 : e.getValue().hashCode());
 	}
 
-	protected LexiconEntry getLexiconEntryFromBuffer(byte[] buffer, String term, int index) {
-		int offset;
-		LexiconEntry lEntry = new LexiconEntry();
-		lEntry.term = term;
-		if (index==0) {
-			lEntry.startOffset = 0;
-			lEntry.startBitOffset = 0;
-			offset = ApplicationSetup.STRING_BYTE_LENGTH;						
-		} else {
-			offset = 0;
-//			lEntry.startOffset =
-//				(((((((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 |
-//					   buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-
-			long startOffset = (buffer[offset++] & 0xff);
-			for (int j=0; j<7; j++)
-				startOffset = startOffset<<8 | (buffer[offset++] & 0xff);
-			lEntry.startOffset = startOffset;
-
-			
-			lEntry.startBitOffset = (byte)(buffer[offset++]&0xff);
-			if (++lEntry.startBitOffset == 8) {
-				lEntry.startBitOffset = 0;
-				lEntry.startOffset++;
+        public LexiconEntry setValue(LexiconEntry v)
+        {
+        	LexiconEntry old = value;
+            value = v;
+            return old;
 			}
 
-			offset += ApplicationSetup.STRING_BYTE_LENGTH;
+        public KEY2 getKey()
+        {
+            return key;
 		}
-		lEntry.termId = 
-			(((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-		lEntry.n_t =
-			(((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-		lEntry.TF =
-			(((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-		
-//		lEntry.endOffset = 
-//			(((((((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 |
-//				   buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
 
-		long endOffset = (int)(buffer[offset++] & 0xff);
-		for (int j=0; j<7; j++)
-			endOffset = endOffset<<8 | (buffer[offset++] & 0xff);
-		lEntry.endOffset = endOffset;
-		
-		lEntry.endBitOffset = (byte)(buffer[offset]&0xff);
-		return lEntry;
+        public LexiconEntry getValue()
+        {
+            return value;    
 	}
 
-	/** Returns an interator that gives every item in the lexicon, in lexical order. Underlying implementation is
-	  * using a lexicon input stream */
-	public Iterator<String> iterator()
+        @SuppressWarnings("unchecked")
+		public boolean equals(Object o)
 	{
-		LexiconInputStream tmp=null;
-		try{
-			tmp = (LexiconInputStream)inputStreamClass.getConstructor(String.class).newInstance(this.lexiconFileName);
-		} catch (Exception e) {logger.error(e);}
-		final LexiconInputStream _lis=tmp;
-		return new Iterator<String>(){
-			LexiconInputStream lis = _lis;
-			 public boolean hasNext(){
-				try{
-					return lis.readNextEntry() != -1;
-				} catch (IOException ioe) {
-					logger.error(ioe);
+            if (! (o instanceof Map.Entry))
 					return false;
-				}
-			}
-			public String next()
-			{
-				return lis.getTerm();
-			}
-			public void remove() { throw new UnsupportedOperationException();}
-		};
+            LexiconFileEntry e1 = this;
+            Map.Entry<String,LexiconEntry> e2 = (Map.Entry)o;
+            return (e1.getKey()==null ?
+              e2.getKey()==null : e1.getKey().equals(e2.getKey()))  &&
+             (e1.getValue()==null ?
+              e2.getValue()==null : e1.getValue().equals(e2.getValue()));
 	}
 }
 
+    public abstract int numberOfEntries();
+    public abstract LexiconEntry getLexiconEntry(KEY term);
+    public abstract Map.Entry<KEY,LexiconEntry> getLexiconEntry(int termid);
+    public abstract Map.Entry<KEY,LexiconEntry> getIthLexiconEntry(int index);
+    public abstract void close();
+}
\ No newline at end of file
Index: src/uk/ac/gla/terrier/structures/UTFBlockLexiconInputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/UTFBlockLexiconInputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/UTFBlockLexiconInputStream.java	(working copy)
@@ -1,200 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is BlockLexiconInputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.DataInput;
-import java.io.EOFException;
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.StringTools;
-/**
- * An input stream for accessing sequentially the entries
- * of a block lexicon.
- * @author Douglas Johnson, Vassilis Plachouras
- * @version $Revision: 1.17 $
- */
-public class UTFBlockLexiconInputStream extends BlockLexiconInputStream {
-	/** The logger used */
-	private static Logger logger = Logger.getRootLogger();
-	/** The term represented as an array of bytes.*/
-	protected byte[] termCharacters = new byte[ApplicationSetup.STRING_BYTE_LENGTH +2];
-	/**
-	 * A default constructor.
-	 */
-	public UTFBlockLexiconInputStream() {
-		super();
-		entrySize = UTFBlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public UTFBlockLexiconInputStream(String filename) {
-		super(filename);
-		entrySize = UTFBlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param file java.io.File the name of the lexicon file.
-	 */
-	public UTFBlockLexiconInputStream(File file) {
-		super(file);
-		entrySize = UTFBlockLexicon.lexiconEntryLength;
-	}
-	
-	/** Read a lexicon from the specified input stream */
-	public UTFBlockLexiconInputStream(DataInput in) {
-		super(in);
-		entrySize = UTFBlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * Read the next lexicon entry.
-	 * @return the number of bytes read if there is no error, 
-	 *		 otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 */
-	public int readNextEntry() throws IOException {
-		try {
-			startBitOffset = (byte) (endBitOffset + 1);
-			startOffset = endOffset;
-			if (startBitOffset == 8) {
-				startOffset = endOffset + 1;
-				startBitOffset = 0;
-			}
-			
-			term = lexiconStream.readUTF();
-			lexiconStream.skipBytes(ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-			
-			termId = lexiconStream.readInt();
-			documentFrequency = lexiconStream.readInt();
-			blockFrequency = lexiconStream.readInt();
-			termFrequency = lexiconStream.readInt();
-			endOffset = lexiconStream.readLong();
-			endBitOffset = lexiconStream.readByte();
-			numPointersRead += documentFrequency;
-			numTokensRead += termFrequency;
-			numTermsRead++;
-			return Lexicon.lexiconEntryLength;
-		} catch (EOFException eofe) {
-			return -1;
-		}
-	}
-	
-	/**
-		* Returns the number of entries in the lexicon file.
-		*/
-		public int numberOfEntries(){
-				return (int)(lexiconFilelength / UTFBlockLexicon.lexiconEntryLength);
-		}
-	
-	/**
-	 * Read the next lexicon entry, where the term is saved as a byte array. No attempt is
-	 * made to parse the byte array and the padding bytes into a String. Use this method when
-	 * you want to get the bytes of the string using getTermCharacters(). This method does
-	 * NOT work with getTerm()
-	 * @return the number of bytes read if there is no error, 
-	 *		 otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 */
-	public int readNextEntryBytes() throws IOException {
-		try {
-			startBitOffset = (byte) (endBitOffset + 1);
-			startOffset = endOffset;
-			if (startBitOffset == 8) {
-				startOffset = endOffset + 1;
-				startBitOffset = 0;
-			}
-
-			Arrays.fill(termCharacters, (byte)0);
-			lexiconStream.readFully(termCharacters, 0, ApplicationSetup.STRING_BYTE_LENGTH +2);
-
-			termId = lexiconStream.readInt();
-			documentFrequency = lexiconStream.readInt();
-			blockFrequency = lexiconStream.readInt();
-			termFrequency = lexiconStream.readInt();
-			endOffset = lexiconStream.readLong();
-			endBitOffset = lexiconStream.readByte();
-			numPointersRead += documentFrequency;
-			numTokensRead += termFrequency;
-			numTermsRead++;
-			return Lexicon.lexiconEntryLength;
-		} catch (EOFException eofe) {
-			return -1;
-		}
-	}
-	
-	/**
-	 * Prints out the contents of the lexicon file to check.
-	 */
-	public void print() {
-		int i = 0; //counter
-		int entryLength = Lexicon.lexiconEntryLength;
-		try {
-			while (readNextEntry() != -1) {
-				System.out.println(
-					""
-						+ (long)i * (long)entryLength
-						+ ", "
-						+ term.trim()
-						+ ", "
-						+ termId
-						+ ", "
-						+ documentFrequency
-						+ ", "
-						+ blockFrequency
-						+ ", "
-						+ termFrequency
-						+ ", "
-						+ endBitOffset);
-				i++;
-			}
-		} catch (IOException ioe) {
-			logger.error("Input/Output exception while reading the lexicon index input stream. ", ioe);
-		}
-	}
-
-	/**
-	 * Returns the string representation of the term.
-	 * @return the string representation of the already found term.
-	 */
-	public String getTerm()
-	{
-		return term;
-	}
-	
-	/** 
-	 * Returns the bytes of the String. Only valid is readNextEntryByte was used.
-	 * @return the byte array holding the term's byte representation
-	 */
-	public byte[] getTermCharacters() {
-		return termCharacters;
-	}
-}
Index: src/uk/ac/gla/terrier/structures/BlockLexiconInputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockLexiconInputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/BlockLexiconInputStream.java	(working copy)
@@ -1,159 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is BlockLexiconInputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.*;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-/**
- * An input stream for accessing sequentially the entries
- * of a block lexicon.
- * @author Douglas Johnson, Vassilis Plachouras
- * @version $Revision: 1.27 $
- */
-public class BlockLexiconInputStream extends LexiconInputStream {
-	/** The logger used */
-	private static Logger logger = Logger.getRootLogger();
-	
-	/** 
-	 * The total number of different blocks a term appears in.
-	 */
-	protected int blockFrequency;
-	/**
-	 * A default constructor.
-	 */
-	public BlockLexiconInputStream() {
-		super();
-		entrySize = BlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public BlockLexiconInputStream(String filename) {
-		super(filename);
-		entrySize = BlockLexicon.lexiconEntryLength;
-	}
-
-	public BlockLexiconInputStream(String path, String prefix) {
-		super(path, prefix);
-		 entrySize = BlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param file java.io.File the name of the lexicon file.
-	 */
-	public BlockLexiconInputStream(File file) {
-		super(file);
-		entrySize = BlockLexicon.lexiconEntryLength;
-	}
-	
-	/** Read a lexicon from the specified input stream */
-	public BlockLexiconInputStream(DataInput in) {
-		super(in);
-		entrySize = BlockLexicon.lexiconEntryLength;
-	}
-	/**
-	 * Read the next lexicon entry.
-	 * @return the number of bytes read if there is no error, 
-	 *		 otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 */
-	public int readNextEntry() throws IOException {
-		try {
-			startBitOffset = (byte) (endBitOffset + 1);
-			startOffset = endOffset;
-			if (startBitOffset == 8) {
-				startOffset = endOffset + 1;
-				startBitOffset = 0;
-			}
-			lexiconStream.readFully(
-				termCharacters,
-				0,
-				ApplicationSetup.STRING_BYTE_LENGTH);
-			
-			termId = lexiconStream.readInt();
-			documentFrequency = lexiconStream.readInt();
-			blockFrequency = lexiconStream.readInt();
-			termFrequency = lexiconStream.readInt();
-			endOffset = lexiconStream.readLong();
-			endBitOffset = lexiconStream.readByte();
-			numPointersRead += documentFrequency;
-			numTokensRead += termFrequency;
-			numTermsRead++;
-			return Lexicon.lexiconEntryLength;
-		} catch (EOFException eofe) {
-			return -1;
-		}
-	}
-	
-	/**
-	   * Returns the number of entries in the lexicon file.
-	   */
-		public int numberOfEntries(){
-				return (int)(lexiconFilelength / BlockLexicon.lexiconEntryLength);
-		}
-	
-	/**
-	 * Prints out the contents of the lexicon file to check.
-	 */
-	public void print() {
-		int i = 0; //counter
-		int entryLength = Lexicon.lexiconEntryLength;
-		try {
-			while (readNextEntry() != -1) {
-				System.out.println(
-					""
-						+ (long)i * (long)entryLength
-						+ ", "
-						+ term.trim()
-						+ ", "
-						+ termId
-						+ ", "
-						+ documentFrequency
-						+ ", "
-						+ blockFrequency
-						+ ", "
-						+ termFrequency
-						+ ", "
-						+ endBitOffset);
-				i++;
-			}
-		} catch (IOException ioe) {
-			logger.error("Input/Output exception while reading the lexicon index input stream. ", ioe);
-		}
-	}
-
-	/**
-	 * Returns the block frequency for the currently processed term.
-	 * @return int The block frequency for the currently processed term
-	 */
-	public int getBlockFrequency() {
-		return blockFrequency;
-	}
-}
Index: src/uk/ac/gla/terrier/structures/EntryStatistics.java
===================================================================
--- src/uk/ac/gla/terrier/structures/EntryStatistics.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/EntryStatistics.java	(revision 0)
@@ -0,0 +1,14 @@
+package uk.ac.gla.terrier.structures;
+/** An interface for basic statistics about a lexical entry (usually a term)
+ * @since 3.0
+ * @author Craig Macdonald
+ */
+public interface EntryStatistics
+{
+	public int getFrequency(); //F
+	public int getDocumentFrequency(); //Nt
+	public int getTermId();
+	
+	public void add(EntryStatistics e);
+    public void subtract(EntryStatistics e);
+}
Index: src/uk/ac/gla/terrier/structures/InvertedIndex.java
===================================================================
--- src/uk/ac/gla/terrier/structures/InvertedIndex.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/InvertedIndex.java	(working copy)
@@ -27,14 +27,13 @@
  */
 package uk.ac.gla.terrier.structures;
 import java.io.IOException;
-import java.util.ArrayList;
+
 import org.apache.log4j.Logger;
 
-import uk.ac.gla.terrier.compression.BitFile;
+import uk.ac.gla.terrier.compression.BitFileBuffered;
 import uk.ac.gla.terrier.compression.BitIn;
 import uk.ac.gla.terrier.compression.BitInSeekable;
 import uk.ac.gla.terrier.compression.OldBitFile;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
 /**
  * This class implements the inverted index 
@@ -67,43 +66,29 @@
 	/** Filename of the open file */
 	protected String filename;
 	
-	/**
-	 * The lexicon used for retrieving documents.
-	 */
-	protected Lexicon lexicon;
-
 	/** A constructor for child classes that doesnt open the file */
 	protected InvertedIndex(long a, long b, long c) { }
 
+	
+
 	/** A default constructor, only for use by child classes */
 	protected InvertedIndex()
 	{
 	
 	}
 
-	public InvertedIndex(Lexicon lexicon, String path, String prefix)
+	public InvertedIndex(Index index, String structureName)
 	{
-		this(lexicon, path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.IFSUFFIX);
+		this(index.getPath() + "/" + index.getPrefix() + "." + structureName + ".bf");
 	}
 	
 	/**
-	 * Creates an instance of the HtmlInvertedIndex class using the lexicon.
-	 * @param lexicon The lexicon used for retrieval
-	 */
-	public InvertedIndex(Lexicon lexicon) {
-		this(lexicon, ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
-		//file = new BitFile(ApplicationSetup.INVERTED_FILENAME, "r");
-		//this.lexicon = lexicon;
-	}
-	/**
 	 * Creates an instance of the HtmlInvertedIndex class using the given
 	 * lexicon.
-	 * @param lexicon The lexicon used for retrieval
 	 * @param filename The name of the inverted file
 	 */
-	public InvertedIndex(Lexicon lexicon, String filename) {
-		file = new BitFile(this.filename = filename, "r");
-		this.lexicon = lexicon;
+	public InvertedIndex(String filename) {
+		file = new BitFileBuffered(this.filename = filename);
 	}
 	/** forces the data structure to reopen the underlying bitfile
 	 *  using the legacy implementation of BitFile (OldBitFile)
@@ -115,349 +100,49 @@
 		file = new OldBitFile(filename, "r");
 	}
 	
-	/**
-	 * Prints out the inverted index file.
-	 */
-	public void print() {
-		for (int i = 0; i < lexicon.getNumberOfLexiconEntries(); i++) {
-			int[][] documents = getDocuments(i);
-			System.out.print("tid"+i);
-			if (useFieldInformation) {
-				for (int j = 0; j < documents[0].length; j++) {
-					System.out.print("(" + documents[0][j] + ", " + documents[1][j]
-							+ ", F" + documents[2][j] + ") ");
-				}
-				System.out.println();				
-			} else {
-				for (int j = 0; j < documents[0].length; j++) {
-					System.out.print("(" + documents[0][j] + ", " 
-										 + documents[1][j] + ") ");
-				}
-				System.out.println();
-			}
-		}
-	}
-
-	public int[][] getDocuments(LexiconEntry lEntry) {
-		if (lEntry==null)
-			return null;
-		return getDocuments(lEntry.startOffset, 
-			lEntry.startBitOffset, 
-			lEntry.endOffset, 
-			lEntry.endBitOffset, lEntry.n_t);
-	}
-	/**
-	 * Returns a two dimensional array containing the document ids, term
-	 * frequencies and field scores for the given documents. 	  
-	 * @return int[][] the two dimensional [3][n] array containing the n 
-	 *		 document identifiers, frequencies and field scores. If fields is not enabled, then size is [2][n].
-	 * @param termid the identifier of the term whose documents we are looking for.
-	 */
-	public int[][] getDocuments(int termid) {
-		 LexiconEntry lEntry = lexicon.getLexiconEntry(termid);
-		if (lEntry == null)
-			return null;
-		return getDocuments(lEntry.startOffset,
-			lEntry.startBitOffset,
-			lEntry.endOffset,
-			lEntry.endBitOffset, lEntry.n_t);
+	public void print()
+	{
+		//TODO
+		throw new UnsupportedOperationException("InvIndex.print() is missing");
 	}
 	
-/**
-	 * Returns a two dimensional array containing the document ids, term
-	 * frequencies and field scores for the given documents. 	  
-	 * @return int[][] the two dimensional [3][n] array containing the n 
-	 *		 document identifiers, frequencies and field scores. If fields is not enabled, then size is [2][n].
-	 * @param sOffset start byte of the postings in the inverted file
-	 * @param sBitOffset start bit of the postings in the inverted file
-	 * @param eOffset end byte of the postings in the inverted file
-	 * @param eBitOffset end bit of the postings in the inverted file
-	 */
-	
-	public int[][] getDocuments(long sOffset, byte sBitOffset, long eOffset, byte eBitOffset, int df) {
 		
-		final byte startBitOffset = sBitOffset;
-		final long startOffset = sOffset;
-		final byte endBitOffset = eBitOffset;
-		final long endOffset = eOffset;
+	public int[][] getDocuments(BitIndexPointer pointer) {
+		if (pointer==null)
+			return null;
 		final int fieldCount = FieldScore.FIELDS_COUNT;
 		final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
-		//int df = lexicon.getNt();
+		final int count = pointer.getNumberOfEntries();
 		try{
+			final BitIn file = this.file.readReset(pointer.getBytes(), pointer.getBits());
 			int[][] documentTerms = null;
-			final BitIn file = this.file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);		
 			if (loadTagInformation) { //if there are tag information to process			
-				documentTerms = new int[3][df];
+				documentTerms = new int[3][count];
 				documentTerms[0][0] = file.readGamma() - 1;
 				documentTerms[1][0] = file.readUnary();
 				documentTerms[2][0] = file.readBinary(fieldCount);
-				for (int i = 1; i < df; i++) {					
+				for (int i = 1; i < count; i++) {					
 					documentTerms[0][i]  = file.readGamma() + documentTerms[0][i - 1];
 					documentTerms[1][i]  = file.readUnary();
 					documentTerms[2][i]  = file.readBinary(fieldCount);
 				}				
 			} else { //no tag information to process					
-				documentTerms = new int[2][df];
+				documentTerms = new int[2][count];
 				//new		
 				documentTerms[0][0] = file.readGamma() - 1;
 				documentTerms[1][0] = file.readUnary();
-				for(int i = 1; i < df; i++){							 
+				for(int i = 1; i < count; i++){							 
 					documentTerms[0][i] = file.readGamma() + documentTerms[0][i - 1];
 					documentTerms[1][i] = file.readUnary();
 				}
 			}
+			file.close();
 			return documentTerms;
 		} catch (IOException ioe) {
 			logger.error("Problem reading inverted index", ioe);
 			return null;
 		}
-	}
 	
-	
-//	public int[][] getDocuments(long sOffset, byte sBitOffset, long eOffset, byte eBitOffset) {
-//	
-//		final byte startBitOffset = sBitOffset;
-//		final long startOffset = sOffset;
-//		final byte endBitOffset = eBitOffset;
-//		final long endOffset = eOffset;
-//		final int fieldCount = FieldScore.FIELDS_COUNT;
-//		final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
-//	
-//		/* Coding is done separately for with Fields and without Fields, to keep
-//		 * if's out of loops. */	
-//		
-//		ArrayList temporaryTerms = null; //instantiate when we know roughly how big it should be
-//		int[][] documentTerms = null;
-//		file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);
-//		//boolean hasMore = false;
-//		if (loadTagInformation) { //if there are tag information to process
-//			/* FIELD_LOAD_FACTOR provides a heuristical rough size need for the arraylist. */
-//			/* could probably do a better optimisation by considering the number of fields.*/
-//			//temporaryTerms = new ArrayList((int)((endOffset-startOffset)*FIELD_LOAD_FACTOR));
-//			TIntArrayList temporaryDocids = new TIntArrayList((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-//			TIntArrayList temporaryTFs = new TIntArrayList((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-//			TIntArrayList temporaryFields = new TIntArrayList((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-//			int previousDocid = -1;
-//			
-//			while (((file.getByteOffset() + startOffset) < endOffset)
-//					|| (((file.getByteOffset() + startOffset) == endOffset) && (file
-//							.getBitOffset() < endBitOffset))) {
-//				//read document ID
-//				temporaryDocids.add(previousDocid = file.readGamma() + previousDocid);
-//				//read document frequency
-//				temporaryTFs.add(file.readUnary());
-//				//read fields bitset (fieldCount bits long)
-//				temporaryFields.add(file.readBinary(fieldCount));
-//		
-//				/*int[] tmp = new int[3];
-//				//read documnent ID
-//				tmp[0] = file.readGamma();
-//				//read document frequency
-//				tmp[1] = file.readUnary();
-//				//read fields bitset (fieldCount bits long) 
-//				tmp[2] = file.readBinary(fieldCount);
-//				temporaryTerms.add(tmp);*/
-//			}
-//			final int postingsListSize = temporaryDocids.size();
-//			documentTerms = new int[3][postingsListSize];
-//			temporaryDocids.toNativeArray(documentTerms[0], 0, postingsListSize);
-//			temporaryTFs.toNativeArray(documentTerms[1], 0, postingsListSize);
-//			temporaryFields.toNativeArray(documentTerms[2], 0, postingsListSize);	
-//			/*
-//			documentTerms = new int[3][temporaryTerms.size()];
-//			int[] tmpDocumentTerms0 = documentTerms[0];
-//			int[] tmpDocumentTerms1 = documentTerms[1];
-//			int[] tmpDocumentTerms2 = documentTerms[2];
-//			tmpDocumentTerms0[0] = ((int[]) temporaryTerms.get(0))[0] - 1;
-//			tmpDocumentTerms1[0] = ((int[]) temporaryTerms.get(0))[1];
-//			tmpDocumentTerms2[0] = ((int[]) temporaryTerms.get(0))[2];
-//			if (documentTerms[0].length > 1) {
-//				for (int i = 1; i < documentTerms[0].length; i++) {
-//					int[] tmpMatrix = (int[]) temporaryTerms.get(i);
-//					tmpDocumentTerms0[i] = tmpMatrix[0] + documentTerms[0][i - 1];
-//					tmpDocumentTerms1[i] = tmpMatrix[1];
-//					tmpDocumentTerms2[i] = tmpMatrix[2];
-//				}
-//			}
-//			*/		
-//		} else { //no tag information to process
-//			
-//			/* NORMAL_LOAD_FACTOR provides a heuristical rough size need for the arraylist */
-//			TIntArrayList temporaryDocids = new TIntArrayList((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-//			TIntArrayList temporaryTFs = new TIntArrayList((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-//			//temporaryTerms = new ArrayList((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-//
-//			int previousDocid = -1;
-//			while (((file.getByteOffset() + startOffset) < endOffset)
-//					|| (((file.getByteOffset() + startOffset) == endOffset) && (file
-//							.getBitOffset() < endBitOffset))) {
-//				//read document ID
-//				temporaryDocids.add(previousDocid = file.readGamma() + previousDocid);
-//				//read document frequency
-//				temporaryTFs.add(file.readUnary());
-//				//int[] tmp = new int[2];
-//				//read document ID
-//				//tmp[0] = file.readGamma();
-//				//read document frequency
-//				//tmp[1] = file.readUnary();
-//				//temporaryTerms.add(tmp);
-//			}
-//
-//			final int postingsListSize = temporaryDocids.size(); /*temporaryTerms.size()*/
-//			documentTerms = new int[2][postingsListSize];
-//			temporaryDocids.toNativeArray(documentTerms[0], 0, postingsListSize);
-//			temporaryTFs.toNativeArray(documentTerms[1], 0, postingsListSize);
-//			//int last = -1;
-//			//int[] tmpDocumentTerms0 = documentTerms[0];
-//			//for(int i=0;i<postingsListSize;i++)
-//			//{
-//			//	last = tmpDocumentTerms0[i] = tmpDocumentTerms0[i] + last;
-//			//}
-//
-//			//int[] tmpDocumentTerms0 = documentTerms[0];
-//			//int[] tmpDocumentTerms1 = documentTerms[1];
-//			//tmpDocumentTerms0[0] = temporaryDocids.get(0);//((int[]) temporaryTerms.get(0))[0] - 1;
-//			//tmpDocumentTerms1[0] = temporaryTFs.get(0); //((int[]) temporaryTerms.get(0))[1];
-//			//if (documentTerms[0].length > 1) {
-//			//	for (int i = 1; i < documentTerms[0].length; i++) {
-//			//		last = tmpDocumentTerms0[i] = temporaryDocids.get(i) + last;
-//			//		tmpDocumentTerms1[i] = temporaryTFs.get(i);
-//					//int[] tmpMatrix = (int[]) temporaryTerms.get(i);
-//					//tmpDocumentTerms0[i] = tmpMatrix[0] + documentTerms[0][i - 1];
-//					//tmpDocumentTerms1[i] = tmpMatrix[1];
-//			//	}
-//			//}
-//		}
-//		//System.out.println((endOffset-startOffset)+" , "+temporaryTerms.size());
-//		return documentTerms;
-//	}
-	/* *
-	 * Returns a five dimensional array containing the document ids, 
-	 * the term frequencies, the field scores the block frequencies and 
-	 * the block ids for the given documents. The returned postings are
-	 * for the documents within a specified range of docids.
-	 * @return int[][] the five dimensional [5][] array containing 
-	 *		 the document ids, frequencies, field scores and block 
-	 *		 frequencies, while the last vector contains the 
-	 *		 block identifiers and it has a different length from 
-	 *		 the document identifiers.
-	 * @param termid the id of the term whose documents we are looking for.
-	 * @param startDocid The starting docid that will be returned.
-	 * @param endDocid The last possible docid that will be returned.
-	 */
-	/*public int[][] getDocuments(int termid, int startDocid, int endDocid) {
-		// Coding is done separately for with Fields and without Fields, to keep
-		  if's out of loops. 
-		boolean found = lexicon.findTerm(termid);
-		if (!found) 
-			return null;
-		
-		byte startBitOffset = lexicon.getStartBitOffset();
-		long startOffset = lexicon.getStartOffset();
-		byte endBitOffset = lexicon.getEndBitOffset();
-		long endOffset = lexicon.getEndOffset();
-		final int fieldCount = FieldScore.FIELDS_COUNT;
-		final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
-		
-		ArrayList<int[]> temporaryTerms = null; //instantiate when we know roughly how big it should be
-		int[][] documentTerms = null;
-		try{
-			final BitIn file = this.file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);
-			//boolean hasMore = false;
-			if (loadTagInformation) { //if there are tag information to process
-				// FIELD_LOAD_FACTOR provides a heuristical rough size need for the arraylist. 
-				// could probably do a better optimisation by considering the number of fields.
-				temporaryTerms = new ArrayList<int[]>((int)((endOffset-startOffset)*FIELD_LOAD_FACTOR));
-				while (((file.getByteOffset() + startOffset) < endOffset)
-						|| (((file.getByteOffset() + startOffset) == endOffset) && (file
-								.getBitOffset() < endBitOffset))) {
-					int[] tmp = new int[3];
-					//read documnent ID
-					tmp[0] = file.readGamma();
-					//read document frequency
-					tmp[1] = file.readUnary();
-					//read fields bitset (fieldCount bits long) 
-					tmp[2] = file.readBinary(fieldCount);
-					if (tmp[0]>=startDocid && tmp[0]<=endDocid)
-						temporaryTerms.add(tmp);
-				}
-				documentTerms = new int[3][temporaryTerms.size()];
-				int[] tmpDocumentTerms0 = documentTerms[0];
-				int[] tmpDocumentTerms1 = documentTerms[1];
-				int[] tmpDocumentTerms2 = documentTerms[2];
-				tmpDocumentTerms0[0] = ((int[]) temporaryTerms.get(0))[0] - 1;
-				tmpDocumentTerms1[0] = ((int[]) temporaryTerms.get(0))[1];
-				tmpDocumentTerms2[0] = ((int[]) temporaryTerms.get(0))[2];
-				if (documentTerms[0].length > 1) {
-					for (int i = 1; i < documentTerms[0].length; i++) {
-						int[] tmpMatrix = (int[]) temporaryTerms.get(i);
-						tmpDocumentTerms0[i] = tmpMatrix[0] + documentTerms[0][i - 1];
-						tmpDocumentTerms1[i] = tmpMatrix[1];
-						tmpDocumentTerms2[i] = tmpMatrix[2];
-					}
-				}			
-			} else { //no tag information to process
-				
-				//NORMAL_LOAD_FACTOR provides a heuristical rough size need for the arraylist 
-				temporaryTerms = new ArrayList<int[]>((int)((endOffset-startOffset)*NORMAL_LOAD_FACTOR));
-				while (((file.getByteOffset() + startOffset) < endOffset)
-						|| (((file.getByteOffset() + startOffset) == endOffset) && (file
-								.getBitOffset() < endBitOffset))) {
-					int[] tmp = new int[2];
-					//read document ID
-					tmp[0] = file.readGamma();
-					//read document frequency
-					tmp[1] = file.readUnary();
-					temporaryTerms.add(tmp);
-				}
-				documentTerms = new int[2][temporaryTerms.size()];
-				int[] tmpDocumentTerms0 = documentTerms[0];
-				int[] tmpDocumentTerms1 = documentTerms[1];
-				tmpDocumentTerms0[0] = ((int[]) temporaryTerms.get(0))[0] - 1;
-				tmpDocumentTerms1[0] = ((int[]) temporaryTerms.get(0))[1];
-				if (documentTerms[0].length > 1) {
-					for (int i = 1; i < documentTerms[0].length; i++) {
-						int[] tmpMatrix = (int[]) temporaryTerms.get(i);
-						tmpDocumentTerms0[i] = tmpMatrix[0] + documentTerms[0][i - 1];
-						tmpDocumentTerms1[i] = tmpMatrix[1];
-					}
-				}			
-			}
-		}
-		catch (IOException ioe) {
-			logger.error("Problem reading inverted index", ioe);
-			return null;
-		}
-		
-		return documentTerms;
-	}*/
-	
-	/**
-	 * Returns the information for a posting list in string format 
-	 */
-	public String getInfo(int term) {
-			StringBuilder info = new StringBuilder();					
-			int[][] documents = getDocuments(term);			
-			if (useFieldInformation) {
-				for (int j = 0; j < documents[0].length; j++) {
-					info.append("(");
-					info.append(documents[0][j]);
-					info.append(","); 
-					info.append(documents[1][j]);
-					info.append(",");
-					info.append(documents[2][j]);
-					info.append(")");
-				}							
-			} else {
-				for (int j = 0; j < documents[0].length; j++) {
-					info.append("(");
-					info.append(documents[0][j]);
-					info.append(",");
-					info.append(documents[1][j]);
-					info.append(")");
-				}				
-			}
-			return info.toString();
 		}
 	
 	
Index: src/uk/ac/gla/terrier/structures/UTFBlockLexicon.java
===================================================================
--- src/uk/ac/gla/terrier/structures/UTFBlockLexicon.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/UTFBlockLexicon.java	(working copy)
@@ -1,316 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is UTFBlockLexicon.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures;
-import java.io.*;
-import java.util.Arrays;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.StringTools;
-import uk.ac.gla.terrier.utility.io.RandomDataInput;
-import uk.ac.gla.terrier.utility.io.RandomDataOutput;
-/**
- * A lexicon class that saves the number of
- * different blocks a term appears in, using UTF encoding of Strings. It is used only during 
- * creating a utf block inverted index. After the utf block inverted
- * index has been created, the utf block lexicon is transformed into 
- * a utf lexicon.
- * @author Douglas Johnson, Vassilis Plachouras
- * @version $Revision: 1.16 $
- */
-public class UTFBlockLexicon extends BlockLexicon {
-	/** The logger used */
-	private static Logger logger = Logger.getRootLogger();
-	/**
-	 * The size in bytes of an entry in the lexicon file. An entry corresponds
-	 * to a string, an int (termCode), an int (docf), an int (tf), a long (the
-	 * offset of the end of the term's entry in bytes in the inverted file) and
-	 * a byte (the offset in bits of the last byte of the term's entry in the
-	 * inverted file.
-	 */
-	public static final int lexiconEntryLength = 
-		2+//two bytes for length written by writeUTF
-		ApplicationSetup.STRING_BYTE_LENGTH //the byte representation of the string, ie 3* MAX_TERM_LENGTH
-		+ 16 //the four integers
-		+ 8 //the long
-		+ 1; //the byte
-	/**
-	 * A default constructor.
-	 */
-	public UTFBlockLexicon() {
-		super();
-		
-		try {
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)lexiconEntryLength);
-			bufferInput.mark(3 * lexiconEntryLength);
-		} catch (IOException ioe) {
-			logger.fatal("Input/output exception while opening for reading the lexicon file. Stack trace follows",ioe);
-		}
-		inputStreamClass = UTFLexiconInputStream.class;
-	}
-
-    public UTFBlockLexicon(String path, String prefix)
-    {
-        this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-    }
-
-	/**
-	 * Constructs an instace of BlockLexicon and opens the corresponding file.
-	 * @param lexiconName the name of the lexicon file.
-	 */
-	public UTFBlockLexicon(String lexiconName) {
-		super(lexiconName);
-		try {
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)lexiconEntryLength);
-			bufferInput.mark(3 * lexiconEntryLength);
-		} catch (IOException ioe) {
-			logger.fatal("Input/output exception while opening for reading the " +
-							"lexicon file. Stack trace follows",ioe);
-		}
-		inputStreamClass = UTFLexiconInputStream.class;
-	}
-	
-
-	
-	/**
-	 * Finds the term given its term code.
-	 * 
-	 * @return true if the term is found, else return false
-	 * @param termId
-	 *            the term's id
-	 */
-	public boolean findTerm(int termId) {
-		try {
-			idToOffsetFile.seek((long)termId * 8L);
-			long lexiconOffset = idToOffsetFile.readLong();
-			if (lexiconOffset == 0) {
-				startOffset = 0;
-				startBitOffset = 0;
-				lexiconFile.seek(lexiconOffset);
-				
-				term = lexiconFile.readUTF();
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				this.termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			} else {
-				lexiconFile.seek(lexiconOffset - 9);
-				//goes to the lexicon offset minus the long offset and a byte
-				startOffset = lexiconFile.readLong();
-				startBitOffset = lexiconFile.readByte();
-				startBitOffset++;
-				if (startBitOffset == 8) {
-					startBitOffset = 0;
-					startOffset++;
-				}
-				term = lexiconFile.readUTF();
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				
-				this.termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.fatal("Input/Output exception while reading the idToOffset file. Stack trace follows.",ioe);
-		}
-		return false;
-	}
-	/**
-	 * Performs a binary search in the lexicon in order to locate the given
-	 * term. If the term is located, the properties termCharacters,
-	 * documentFrequency, termFrequency, startOffset, startBitOffset, endOffset
-	 * and endBitOffset contain the values related to the term.
-	 * 
-	 * @param _term the term to search for.
-	 * @return true if the term is found, and false otherwise.
-	 */
-	public boolean findTerm(String _term) {
-		Arrays.fill(buffer, (byte) 0);
-		Arrays.fill(bt, (byte) 0);
-		byte[] bt = _term.getBytes(); String tmpTerm = null;
-		//int termLength = ApplicationSetup.STRING_BYTE_LENGTH;			
-		//int _termId = 0;
-		long low = -1;
-		long high = numberOfLexiconEntries;
-		long i;
-		while (high-low>1) {
-			
-			i = (long)(high+low)/2;
-			try {
-				lexiconFile.seek((long)i * (long)UTFBlockLexicon.lexiconEntryLength);
-				tmpTerm = lexiconFile.readUTF();
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-			} catch (IOException ioe) {
-				logger.fatal(
-					"Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-			}
-			
-			int compareResult = 0;
-			compareResult = _term.compareTo(tmpTerm);
-			
-			if (compareResult < 1)
-				high = i;
-			else
-				low = i;			
-		}
-		if (high == numberOfLexiconEntries)
-			return false;
-		try {
-			lexiconFile.seek((long)high * (long)UTFBlockLexicon.lexiconEntryLength);
-			tmpTerm = lexiconFile.readUTF();
-			lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-		}	
-		
-		if (_term.compareTo(tmpTerm)==0) {
-			try {
-				findTerm(lexiconFile.readInt());
-				return true;
-			}catch(IOException ioe) {
-				logger.fatal("Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-			}
-		}
-		return false;
-	}
-
-	/**
-	 * Returns the block frequency for the given term
-	 * @return int The block frequency for the given term
-	 */
-	public int getBlockFrequency() {
-		return blockFrequency;
-	}
-	/**
-	 * Seeks the i-th entry of the lexicon.
-	 * @param i
-	 *            The index of the entry we are looking for.
-	 * @return true if the entry was found, false otherwise.
-	 */
-	public boolean seekEntry(int i) {
-		try {
-			if (i > numberOfLexiconEntries)
-				return false;
-			if (i == 0) {
-				lexiconFile.seek((long)i * (long)lexiconEntryLength);
-				startOffset = 0;
-				startBitOffset = 0;
-				term = lexiconFile.readUTF();
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			} else {
-				lexiconFile.seek((long)i * (long)lexiconEntryLength - (long)lexiconEntryLength
-						+ 2L + (long)ApplicationSetup.STRING_BYTE_LENGTH + 12L);
-				startOffset = lexiconFile.readLong();
-				startBitOffset = lexiconFile.readByte();
-				startBitOffset++;
-				if (startBitOffset == 8) {
-					startBitOffset = 0;
-					startOffset++;
-				}
-				
-				term = lexiconFile.readUTF();
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				blockFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.fatal("Input/Output exception while reading the idToOffset file. Stack trace follows.",ioe);
-		}
-		return false;
-	}
-	
-	/**
-	 * In an already stored entry in the lexicon file, the information about the
-	 * term frequency, the endOffset in bytes, and the endBitOffset in the last
-	 * byte, is updated. The term is specified by the index of the entry.
-	 * 
-	 * @return true if the information is updated properly, otherwise return
-	 *         false
-	 * @param i the i-th entry
-	 * @param frequency the term's Frequency
-	 * @param endOffset the offset of the ending byte in the inverted file
-	 * @param endBitOffset the offset in bits in the ending byte in the term's entry in
-	 *            inverted file
-	 * @deprecated Block Lexicons are used during indexing, but not during
-	 *             retrieval.
-	 */
-	public boolean updateEntry(int i, int frequency, long endOffset,
-			byte endBitOffset) {
-		if (! (lexiconFile instanceof RandomDataOutput))
-            return false;
-        RandomDataOutput _lexiconFile = (RandomDataOutput)lexiconFile;
-		try {
-			long lexiconOffset = (long)i * (long)lexiconEntryLength;
-			//we seek the offset where the frequency should be writen
-			_lexiconFile.seek(lexiconOffset
-					+ (long)ApplicationSetup.STRING_BYTE_LENGTH + 8L);
-			_lexiconFile.writeInt(frequency);
-			_lexiconFile.writeLong(endOffset);
-			_lexiconFile.writeByte(endBitOffset);
-		} catch (IOException ioe) {
-			logger.fatal("Input/Output exception while updating the lexicon file. Stack trace follows.",ioe);
-		}
-		return false;
-	}
-
-	/** returns the number of entries in the lexicon named by f  */
-    public static int numberOfEntries(File f)
-    {
-        return (int)(f.length()/ (long)lexiconEntryLength);
-    }
-
-	/** returns the number of entries in the lexicon named by filename */
-    public static int numberOfEntries(String filename)
-    {
-        return numberOfEntries(new File(filename));
-    }
-
-}
Index: src/uk/ac/gla/terrier/structures/indexing/LexiconBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/LexiconBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/LexiconBuilder.java	(working copy)
@@ -25,27 +25,24 @@
  *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
  */
 package uk.ac.gla.terrier.structures.indexing;
-import gnu.trove.TIntObjectHashMap;
-
-import java.io.DataOutputStream;
 import java.io.IOException;
-import java.io.ObjectOutputStream;
-import java.io.OutputStream;
 import java.util.Arrays;
-import java.util.HashSet;
+import java.util.Iterator;
 import java.util.LinkedList;
+import java.util.Map;
 import java.util.PriorityQueue;
-import java.util.Set;
 
+import org.apache.hadoop.io.Text;
 import org.apache.log4j.Logger;
 
+import uk.ac.gla.terrier.structures.Closeable;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.Lexicon;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconInputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexicon;
+import uk.ac.gla.terrier.structures.FSOMapFileLexiconOutputStream;
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.Files;
 /**
  * Builds temporary lexicons during indexing a collection and
  * merges them when the indexing of a collection has finished.
@@ -54,15 +51,16 @@
  */
 public class LexiconBuilder
 {
-	/** class to be used as a lexiconinputstream. set by this and child classes */
-	protected Class lexiconInputStream = null;
+
 	/** class to be used as a lexiconoutpustream. set by this and child classes */
-	protected Class lexiconOutputStream = null;
+	protected Class<? extends LexiconOutputStream> lexiconOutputStream = null;
 
-	protected Class LexiconMapClass = null;
+	protected Class<? extends LexiconMap> LexiconMapClass = null;
+	
+	protected final String lexiconEntryFactoryValueClass;
 	
 	/** The logger used for this class */
-	protected static Logger logger = Logger.getRootLogger();
+	protected static Logger logger = Logger.getLogger(LexiconBuilder.class);
 	
 	/** How many documents have been processed so far.*/
 	protected int DocCount = 0;
@@ -70,12 +68,10 @@
 	/** How many terms are in the final lexicon */
 	protected int TermCount = 0;
 	
-	/** 
-	 * The number of documents for which a temporary lexicon
-	 * is created.
-	 */
-	protected static final int DocumentsPerLexicon = ApplicationSetup.BUNDLE_SIZE;
-	/** The linkedlist in which the temporary lexicon filenames are stored.
+	/** The number of documents for which a temporary lexicon is created. 
+	 * Corresponds to property <tt>bundle.size</tt>, default value 2000. */
+	protected static final int DocumentsPerLexicon = Integer.parseInt(ApplicationSetup.getProperty("bundle.size", "2000"));
+	/** The linkedlist in which the temporary lexicon structure names are stored.
 	  * These are merged into a single Lexicon by the merge() method. 
 	  * LinkedList is best List implementation for this, as all operations
 	  * are either append element, or remove first element - making LinkedList
@@ -86,7 +82,7 @@
 	protected LexiconMap TempLex;
 	
 	/** The directory to write temporary lexicons to */
-	protected String TemporaryLexiconDirectory = null;
+	//protected String TemporaryLexiconDirectory = null;
 	
 	/** The directory to write the final lexicons to */
 	protected String indexPath = null;
@@ -99,10 +95,10 @@
 	protected int TempLexCount = 0;
 	
 	/** How many temporary directories have been generated so far */
-	protected int TempLexDirCount = 0;
+	//protected int TempLexDirCount = 0;
 	
 	/** How many temporary lexicons per temporary directory. Set from the property <tt>lexicon.builder.templexperdir</tt>, default 100 */
-	protected static final int TempLexPerDir = Integer.parseInt(ApplicationSetup.getProperty("lexicon.builder.templexperdir", "100"));
+	//protected static final int TempLexPerDir = Integer.parseInt(ApplicationSetup.getProperty("lexicon.builder.templexperdir", "100"));
 
 	/** Should we only merge lexicons in pairs (Terrier 1.0.x scheme)? Set by property <tt>lexicon.builder.merge.2lex.attime</tt> */
 	protected static final boolean MERGE2LEXATTIME = Boolean.parseBoolean(ApplicationSetup.getProperty("lexicon.builder.merge.2lex.attime", "false"));
@@ -110,40 +106,72 @@
 	/** Number of lexicons to merge at once. Set by property <tt>lexicon.builder.merge.lex.max</tt>, defaults to 16 */
 	protected static final int MAXLEXMERGE = Integer.parseInt(ApplicationSetup.getProperty("lexicon.builder.merge.lex.max", "16"));
 
-	/**
-	 * A default constructor of the class. The lexicon is built in the 
-	 * default path and file: ApplicationSetup.TERRIER_INDEX_PATH and 
-	 * ApplicationSetup.TERRIER_INDEX_PREFIX respectively.
-	 * @deprecated
-	 */
-	public LexiconBuilder()
+	public interface CollectionStaticticsCounter<V> extends Closeable
 	{
-		this(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
+		public void count(V value);
 	}
 	
-	public LexiconBuilder(Index i) {
+	static class BasicLexiconCollectionStaticticsCounter 
+		implements CollectionStaticticsCounter<LexiconEntry>
+	{
+		long numberOfTokens = 0;
+		int numberOfTerms = 0;
+		long numberOfPointers = 0;
+		final Index index;
+		public BasicLexiconCollectionStaticticsCounter(Index _index)
+		{
+			index = _index;
+		}
+		
+		public void count(LexiconEntry value)
+	{
+			numberOfTokens += value.getFrequency();
+			numberOfPointers += value.getDocumentFrequency();
+			numberOfTerms++;
+	}
+	
+		public void close()
+		{
+			if (index != null)
+			{
+				index.setIndexProperty("num.Terms", ""+numberOfTerms);
+				index.setIndexProperty("num.Tokens", ""+numberOfTokens);
+				index.setIndexProperty("num.Pointers", ""+numberOfPointers);
+			}
+		}
+	}
+	
+	protected String defaultStructureName;
+	protected FixedSizeWriteableFactory<LexiconEntry> valueFactory;
+	
+	
+	public LexiconBuilder(Index i, String _structureName) {
+		this(i, _structureName, 
+				LexiconMap.class, "uk.ac.gla.terrier.structures.BasicLexiconEntry");
+	}
+	
+	@SuppressWarnings("unchecked")
+	protected LexiconBuilder(Index i, String _structureName, 
+			Class <? extends LexiconMap> _LexiconMapClass,
+			String _lexiconEntryClass)
+	{
 		this.index = i;
 		this.indexPath = index.getPath();
 		this.indexPrefix = index.getPrefix();
-		TemporaryLexiconDirectory = indexPath + ApplicationSetup.FILE_SEPARATOR + indexPrefix + "_";
-		LexiconMapClass = LexiconMap.class;	
+		this.defaultStructureName = _structureName;
+		//TemporaryLexiconDirectory = indexPath + ApplicationSetup.FILE_SEPARATOR + indexPrefix + "_";
+		LexiconMapClass = _LexiconMapClass;	
+		lexiconEntryFactoryValueClass = _lexiconEntryClass;
 		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-		lexiconInputStream = LexiconInputStream.class;
-		lexiconOutputStream = LexiconOutputStream.class;
-	}
 	
-	/** 
-	 * Creates an instance of the class, given the path
-	 * to save the temporary lexicons.
-	 * @param pathname String the path to save the temporary lexicons.
-	 */
-	public LexiconBuilder(String pathname, String prefix) {
-		indexPath = pathname;
-		indexPrefix = prefix;
-		TemporaryLexiconDirectory = pathname + ApplicationSetup.FILE_SEPARATOR + prefix + "_";
-		LexiconMapClass = LexiconMap.class;	
-		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-		lexiconInputStream = LexiconInputStream.class;
+		this.index.addIndexStructure(
+				defaultStructureName+"-keyfactory", 
+				"uk.ac.gla.terrier.structures.seralization.FixedSizeTextFactory",
+				"java.lang.String",
+				"${max.term.length}"
+				);
+		this.index.addIndexStructure(defaultStructureName+"-valuefactory", lexiconEntryFactoryValueClass+"$Factory", "", "");
+		valueFactory = (FixedSizeWriteableFactory<LexiconEntry>)this.index.getIndexStructure(defaultStructureName+"-valuefactory");
 		lexiconOutputStream = LexiconOutputStream.class;
 	}
 
@@ -155,9 +183,11 @@
 
 	/** If the application code generated lexicons itself, use this method to add them to the merge list 
 	  * Otherwise dont touch this method.
-	  * @param filename Fully path to a lexicon to merge */
-	public void addTemporaryLexicon(String filename) {
-		filename = ApplicationSetup.makeAbsolute(filename, TemporaryLexiconDirectory);
+	  * @param structureName Fully path to a lexicon to merge
+	  * @deprecated */
+	public void addTemporaryLexicon(String structureName) {
+		tempLexFiles.addLast(structureName);
+		//filename = ApplicationSetup.makeAbsolute(filename, TemporaryLexiconDirectory);
 	}
 
 	/** Writes the current contents of TempLex temporary lexicon binary tree down to
@@ -166,19 +196,21 @@
 	protected void writeTemporaryLexicon()
 	{
 		try{
-			TempLexDirCount = TempLexCount / TempLexPerDir;
-			if (! Files.exists(TemporaryLexiconDirectory + TempLexDirCount)) {
-				String tmpDir = TemporaryLexiconDirectory + TempLexDirCount;
-				Files.mkdir(tmpDir);
-				Files.deleteOnExit(tmpDir);//it's fine to mark the temporary *directory* for deletion
-			}
-			String tmpLexName = TemporaryLexiconDirectory + TempLexDirCount + ApplicationSetup.FILE_SEPARATOR + 
-				(TempLexCount) + ApplicationSetup.LEXICONSUFFIX;
-			LexiconOutputStream los = getLexOutputStream(tmpLexName);
+			//TempLexDirCount = TempLexCount / TempLexPerDir;
+			//if (! Files.exists(TemporaryLexiconDirectory + TempLexDirCount)) {
+			//	String tmpDir = TemporaryLexiconDirectory + TempLexDirCount;
+			//	Files.mkdir(tmpDir);
+			//	Files.deleteOnExit(tmpDir);//it's fine to mark the temporary *directory* for deletion
+			//}
+			//String tmpLexName = TemporaryLexiconDirectory + TempLexDirCount + ApplicationSetup.FILE_SEPARATOR + TempLexCount;
+			//LexiconOutputStream<String> los = getLexOutputStream(TempLexDirCount+""+TempLexCount);
+			final String tmpLexName = this.defaultStructureName+"-tmp"+ TempLexCount;
+			LexiconOutputStream<String> los = getLexOutputStream(tmpLexName);
 			TempLex.storeToStream(los);
 			los.close();
 			/* An alternative but deprecated method to store the temporary lexicons is: 
 			 * TempLex.storeToFile(tmpLexName); */
+			//tempLexFiles.addLast(TempLexDirCount+""+TempLexCount);
 			tempLexFiles.addLast(tmpLexName);
 		}catch(IOException ioe){
 			logger.error("Indexing failed to write a lexicon to disk : ", ioe);
@@ -224,15 +256,7 @@
 	 * inverted index.
 	 */
 	public void finishedInvertedIndexBuild() {
-		if (Boolean.parseBoolean(ApplicationSetup.getProperty("lexicon.use.hash","true"))) {
-			String lexiconFilename = indexPath + ApplicationSetup.FILE_SEPARATOR + indexPrefix + ApplicationSetup.LEXICONSUFFIX;
-			LexiconInputStream lexStream = getLexInputStream(lexiconFilename);
-			this.createLexiconHash(lexStream);
-		}
-		if (index != null)
-		{
-			index.addIndexStructure("lexicon", "uk.ac.gla.terrier.structure.Lexicon");
-		}
+		LexiconBuilder.optimise(index, defaultStructureName);
 	}
 	
 	/** 
@@ -251,40 +275,23 @@
 		//merges the temporary lexicons
 		if (tempLexFiles.size() > 0)
 		{
-			Set<String> tempDirectories = new HashSet<String>();
-			for(String tmpLex : tempLexFiles)
-			{
-				tempDirectories.add(Files.getParent(tmpLex));
-			}
+			//Set<String> tempDirectories = new HashSet<String>();
+			//for(String tmpLex : tempLexFiles)
+			//{
+			//	tempDirectories.add(Files.getParent(tmpLex));
+			//}
 			try{
 				merge(tempLexFiles);
 				
-			
-				//creates the offsets file
-				final String lexiconFilename = 
-							indexPath + ApplicationSetup.FILE_SEPARATOR + 
-							indexPrefix + ApplicationSetup.LEXICONSUFFIX;
-				LexiconInputStream lis = getLexInputStream(lexiconFilename);
-				createLexiconIndex(
-						lis,
-						lis.numberOfEntries(),
-						Lexicon.lexiconEntryLength
-						); 
-				TermCount = lis.numberOfEntries();
-				if (index != null)
-				{
-					index.addIndexStructure("lexicon", "uk.ac.gla.terrier.structures.Lexicon");
-					index.addIndexStructureInputStream("lexicon", "uk.ac.gla.terrier.structures.LexiconInputStream");
-					index.setIndexProperty("num.Terms", ""+lis.numberOfEntries());
-					index.setIndexProperty("num.Pointers", ""+lis.getNumberOfPointersRead());
-				}
+				//creates the offsets and hash file
+				LexiconBuilder.optimise(index, defaultStructureName);
 			} catch(IOException ioe){
 				logger.error("Indexing failed to merge temporary lexicons to disk : ", ioe);
 			}
-			for (String tmpDir : tempDirectories)
-			{
-				Files.delete(tmpDir);
-			}	
+			//for (String tmpDir : tempDirectories)
+			//{
+			//	Files.delete(tmpDir);
+			//}
 		}	
 		else
 			logger.warn("No temporary lexicons to merge, skipping");
@@ -297,6 +304,7 @@
 	 * @throws IOException an input/output exception is throws 
 	 *		 if a problem is encountered.
 	 */
+	@SuppressWarnings("unchecked")
 	public void merge(LinkedList<String> filesToMerge) throws IOException {
 		//now the merging of the files in the filesToMerge vector 
 		//must take place. 
@@ -318,7 +326,8 @@
 		}
 		if (StartFileCount == 1)
 		{
-			Files.rename(filesToMerge.removeFirst(), indexPath + ApplicationSetup.FILE_SEPARATOR +indexPrefix + ApplicationSetup.LEXICONSUFFIX);
+			FSOMapFileLexicon.renameMapFileLexicon(filesToMerge.removeFirst(), index.getPath(), index.getPrefix(), 
+					defaultStructureName, index.getPath(), index.getPrefix());
 		}
 		else if (MERGE2LEXATTIME)
 		{
@@ -326,7 +335,7 @@
 			if (logger.isDebugEnabled())
 				logger.debug("begin merging "+ StartFileCount +" temporary lexicons, in pairs...");
 			long startTime = System.currentTimeMillis();
-			int progressiveNumber = ApplicationSetup.MERGE_TEMP_NUMBER;
+			int progressiveNumber = 0;
 			String newMergedFile = null;
 			while (filesToMerge.size() > 1) {
 				String fileToMerge1 = (String) filesToMerge.removeFirst();
@@ -334,23 +343,17 @@
 				
 				//give the proper name to the final merged lexicon
 				if (filesToMerge.size() == 0) 
-					newMergedFile = indexPath + ApplicationSetup.FILE_SEPARATOR + 
-						indexPrefix + ApplicationSetup.LEXICONSUFFIX;
+					newMergedFile = defaultStructureName;
 				else 
-					newMergedFile =
-						Files.getParent(fileToMerge1) 
-							+ ApplicationSetup.FILE_SEPARATOR
-							+ ApplicationSetup.MERGE_PREFIX
-							+ String.valueOf(progressiveNumber++)
-							+ ApplicationSetup.LEXICONSUFFIX;
+					newMergedFile = defaultStructureName + "-mergetmp"+ String.valueOf(progressiveNumber++);
 	
 				//The opening of the files needs to break into more steps, so that
 				//all the open streams are closed after the completion of the 
 				//operation, and eventually the intermediate files are deleted.
 
-				LexiconInputStream lis1 = getLexInputStream(fileToMerge1);
-				LexiconInputStream lis2 = getLexInputStream(fileToMerge2);
-				LexiconOutputStream los = getLexOutputStream(newMergedFile);
+				Iterator<Map.Entry<String,LexiconEntry>> lis1 = getLexInputStream(fileToMerge1);
+				Iterator<Map.Entry<String,LexiconEntry>> lis2 = getLexInputStream(fileToMerge2);
+				LexiconOutputStream<String> los = getLexOutputStream(newMergedFile);
 	
 				if (logger.isDebugEnabled())
 					logger.debug(
@@ -364,8 +367,8 @@
 				mergeTwoLexicons(lis1, lis2, los);
 			
 				//delete the two files just merged
-				Files.delete(fileToMerge1);
-				Files.delete(fileToMerge2);
+				FSOMapFileLexicon.deleteMapFileLexicon(fileToMerge1, indexPath, indexPrefix);
+				FSOMapFileLexicon.deleteMapFileLexicon(fileToMerge2, indexPath, indexPrefix);
 				filesToMerge.addLast(newMergedFile);
 			}
 			long endTime = System.currentTimeMillis();
@@ -377,7 +380,7 @@
 			if (logger.isDebugEnabled())
 				logger.debug("begin merging "+ StartFileCount +" files in batches of upto "+mergeNMaxLexicon+"...");
 			long startTime = System.currentTimeMillis();
-			int progressiveNumber = ApplicationSetup.MERGE_TEMP_NUMBER;
+			int progressiveNumber = 0;
 	
 
 			while (filesToMerge.size() > 1)
@@ -386,7 +389,7 @@
 				if (logger.isDebugEnabled())
 					 logger.debug("merging "+ numLexicons + " temporary lexicons");
 				final String inputLexiconFileNames[] = new String[numLexicons];
-				final LexiconInputStream[] lis = new LexiconInputStream[numLexicons];
+				final Iterator<Map.Entry<String,LexiconEntry>>[] lis = (Iterator<Map.Entry<String,LexiconEntry>>[])new Iterator[numLexicons];
 
 				for(int i=0;i<numLexicons;i++)
 				{
@@ -397,21 +400,15 @@
 				String newMergedFile = null;
 				//give the proper name to the final merged lexicon
 				if (filesToMerge.size() == 0)
-					newMergedFile = indexPath + ApplicationSetup.FILE_SEPARATOR +
-						indexPrefix + ApplicationSetup.LEXICONSUFFIX;
+					newMergedFile = defaultStructureName;
 				else
-					newMergedFile =
-						Files.getParent(inputLexiconFileNames[0])
-							+ ApplicationSetup.FILE_SEPARATOR
-							+ ApplicationSetup.MERGE_PREFIX
-							+ String.valueOf(progressiveNumber++)
-							+ ApplicationSetup.LEXICONSUFFIX;
+					newMergedFile = defaultStructureName + "-mergetmp"+ String.valueOf(progressiveNumber++);
 
-				final LexiconOutputStream  los = getLexOutputStream(newMergedFile);
+				final LexiconOutputStream<String> los = getLexOutputStream(newMergedFile);
 				mergeNLexicons(lis, los);
-				for(int i=0;i<numLexicons;i++)
+				for(String inputLexiconFileName : inputLexiconFileNames)
 				{
-					Files.delete(inputLexiconFileNames[i]);
+					FSOMapFileLexicon.deleteMapFileLexicon(inputLexiconFileName, index.getPath(), index.getPrefix());
 				}
 				filesToMerge.addLast(newMergedFile);
 			}
@@ -424,7 +421,8 @@
 				logger.debug("begin merging "+ StartFileCount +" temporary lexicons at once...");
 			long startTime = System.currentTimeMillis();
 			final String inputLexiconFileNames[] = new String[StartFileCount];
-			final LexiconInputStream[] lis = new LexiconInputStream[StartFileCount];
+			final Iterator<Map.Entry<String,LexiconEntry>>[] lis = 
+				(Iterator<Map.Entry<String,LexiconEntry>>[]) new Iterator[StartFileCount];
 			
 			for(int i=0;i<StartFileCount;i++)
 			{
@@ -432,34 +430,52 @@
 				lis[i] = getLexInputStream(inputLexiconFileNames[i]);
 				//logger.debug(i+" "+inputLexiconFileNames[i]);
 			}
-			final LexiconOutputStream los = getLexOutputStream( indexPath + ApplicationSetup.FILE_SEPARATOR +
-				indexPrefix + ApplicationSetup.LEXICONSUFFIX);
+			final LexiconOutputStream<String> los = getLexOutputStream(defaultStructureName);
 			mergeNLexicons(lis, los);
 			for(int i=0;i<StartFileCount;i++)
 			{
-					Files.delete(inputLexiconFileNames[i]);
+				FSOMapFileLexicon.deleteMapFileLexicon(inputLexiconFileNames[i], index.getPath(), index.getPrefix());
 			}
 			long endTime = System.currentTimeMillis();
 			if (logger.isDebugEnabled())
 				logger.debug("end of merging...("+((endTime-startTime)/1000.0D)+" seconds)");
 		}
+		FSOMapFileLexiconOutputStream.addLexiconToIndex(this.index, defaultStructureName, lexiconEntryFactoryValueClass+"$Factory");
+	}
+	
+	protected LexiconEntry newLexiconEntry(int termid)
+	{
+		LexiconEntry rtr = valueFactory.newInstance();
+		rtr.setTermId(termid);
+		return rtr;
 	}
 	
-	protected void mergeNLexicons(LexiconInputStream[] lis, LexiconOutputStream los) throws IOException
+	@SuppressWarnings("unchecked")
+	protected void mergeNLexicons(Iterator<Map.Entry<String,LexiconEntry>>[] lis, LexiconOutputStream<String> los) throws IOException
 	{
 		final int numLexicons = lis.length;
-		long totalTokens = 0;
-		long totalPointers = 0;
-		int hasMore[] = new int[numLexicons];
-		Arrays.fill(hasMore, -1);
+		boolean hasMore[] = new boolean[numLexicons];
+		Map.Entry<String,LexiconEntry>[] currentEntries = new Map.Entry[numLexicons];
+		
+		Arrays.fill(hasMore, false);
 		PriorityQueue<String> terms = new PriorityQueue<String>(numLexicons);
 		for(int i=0;i<numLexicons;i++)
 		{
-			hasMore[i] = lis[i].readNextEntry();
-			terms.add(lis[i].getTerm());	
+			hasMore[i] = lis[i].hasNext();
+			if (hasMore[i])
+			{
+				currentEntries[i] = lis[i].next();
+				terms.add(currentEntries[i].getKey());
+			}
+			else
+			{
+				currentEntries[i] = null;
+			}
+				
 		}
-		int Tf = 0; int Nt = 0; String targetTerm= null;
+		String targetTerm= null;
 		int targetTermId  = -1;
+		LexiconEntry nextEntryToWrite = null;
 		while(terms.size() > 0)
 		{
 			//what term are we working on
@@ -470,24 +486,28 @@
 			{
 				//does this lexicon contain the term
 				//logger.debug("Checking lexicon "+i+" for "+targetTerm+"="+lis[i].getTerm());
-				if(hasMore[i] != -1 && lis[i].getTerm().equals(targetTerm))
+				if(hasMore[i] && currentEntries[i].getKey().equals(targetTerm))
 				{
 					if (targetTermId == -1)
 					{	//obtain the termid for this term from the first lexicon that has the term
-						targetTermId = lis[i].getTermId();
+						nextEntryToWrite = newLexiconEntry(targetTermId = currentEntries[i].getValue().getTermId());
 					}
-					else if (targetTermId != lis[i].getTermId())
+					else if (targetTermId != currentEntries[i].getValue().getTermId())
 					{	//check the termids match for this term
-						logger.error("Term "+targetTerm+" had two termids ("+targetTermId+","+lis[i].getTermId()+")");
+						logger.error("Term "+targetTerm+" had two termids ("+targetTermId+","+currentEntries[i].getValue().getTermId()+")");
 					}
 					//logger.debug("Term "+targetTerm + " found in "+i + "termid="+ lis[i].getTermId());
-					Tf += lis[i].getTF();
-					Nt += lis[i].getNt();
-					hasMore[i] = lis[i].readNextEntry();
-					if (hasMore[i] != -1)
+					nextEntryToWrite.add(currentEntries[i].getValue());
+					hasMore[i] = lis[i].hasNext();
+					
+					if (hasMore[i])
+					{
+						currentEntries[i] = lis[i].next();
+						terms.add(currentEntries[i].getKey());
+					}
+					else
 					{
-						terms.add(lis[i].getTerm());
-						//break;
+						currentEntries[i] = null;
 					}
 					break;
 				}
@@ -499,19 +519,18 @@
 					logger.error("Term "+ targetTerm + " not found in any lexicons");
 				}
 				//end of this term, so we can write the lexicon entry
-				totalTokens += Tf;
-				totalPointers += Nt;
-				los.writeNextEntry(targetTerm, targetTermId, Nt, Tf, 0, (byte)0);
-				Tf = Nt = 0; targetTermId = -1; targetTerm = null;
+				los.writeNextEntry(targetTerm, nextEntryToWrite);
+				nextEntryToWrite = null; targetTermId = -1; targetTerm = null;
 			}
 		}
-		totalTokens += Tf;
-		totalPointers += Nt;
 		if (targetTermId != -1)
-			los.writeNextEntry(targetTerm, targetTermId, Nt, Tf, 0, (byte)0);
+			los.writeNextEntry(targetTerm, nextEntryToWrite);
 		los.close();
 		for(int i=0;i<numLexicons;i++)
-			lis[i].close();
+		{
+			if (lis[i] instanceof Closeable)
+				((Closeable)lis[i]).close();
+		}
 	}
 		
 
@@ -521,9 +540,9 @@
 	  * @param los Lexion to be merged to
 	  */
 	protected void mergeTwoLexicons(
-			LexiconInputStream lis1,
-			LexiconInputStream lis2,
-			LexiconOutputStream los) throws IOException
+			Iterator<Map.Entry<String,LexiconEntry>> lis1,
+			Iterator<Map.Entry<String,LexiconEntry>> lis2,
+			LexiconOutputStream<String> los) throws IOException
 	{
 
 		//We always take the first two entries of
@@ -538,21 +557,22 @@
 		int termID1 = 0;
 		int termID2 = 0;
 
-		long totalTokens = 0;
-		long totalPointers = 0;
 	
-
-		hasMore1 = (lis1.readNextEntry()!=-1);
-		hasMore2 = (lis2.readNextEntry()!=-1);
+		hasMore1 = lis1.hasNext();
+		hasMore2 = lis2.hasNext();
 		String sTerm1 = null;
 		String sTerm2 = null;
+		Map.Entry<String, LexiconEntry> lee1 = null;
+		Map.Entry<String, LexiconEntry> lee2 = null;
 		if (hasMore1) {
-			termID1 = lis1.getTermId();
-			sTerm1 = lis1.getTerm();
+			lee1 = lis1.next();
+			termID1 = lee1.getValue().getTermId();
+			sTerm1 = lee1.getKey();
 		}
 		if (hasMore2) {
-			termID2 = lis2.getTermId();
-			sTerm2 = lis2.getTerm();
+			lee2 = lis2.next();
+			termID2 = lee2.getValue().getTermId();
+			sTerm2 = lee2.getKey();
 		}
 		while (hasMore1 && hasMore2) {
 			int compareString = 0;
@@ -567,341 +587,137 @@
 			}
 			
 			if (compareString <0) {
-				totalTokens += lis1.getTF();
-				totalPointers += lis1.getNt();
-				los.writeNextEntry(sTerm1, termID1, lis1.getNt(), lis1.getTF(), lis1.getEndOffset(), lis1.getEndBitOffset());
-				hasMore1 = (lis1.readNextEntry()!=-1);
+				los.writeNextEntry(sTerm1, lee1.getValue());
+				hasMore1 = lis1.hasNext();
 				if (hasMore1) {
-					termID1 = lis1.getTermId();
-					sTerm1 = lis1.getTerm();
+					lee1 = lis1.next();
+					termID1 = lee1.getValue().getTermId();
+					sTerm1 = lee1.getKey();
 				}
 			} else if (compareString >0) {
-				totalTokens += lis2.getTF();
-				totalPointers += lis2.getNt();
-				los.writeNextEntry(sTerm2, termID2, lis2.getNt(), lis2.getTF(), lis2.getEndOffset(), lis2.getEndBitOffset());
-				hasMore2 = (lis2.readNextEntry()!=-1);
+				los.writeNextEntry(sTerm2, lee2.getValue());
+				hasMore2 = lis2.hasNext();
 				if (hasMore2) {
-					termID2 = lis2.getTermId();
-					sTerm2 = lis2.getTerm();
+					lee2 = lis2.next();
+					termID2 = lee2.getValue().getTermId();
+					sTerm2 = lee2.getKey();
 				}
 			} else /*if (compareString == 0)*/ {
-				totalTokens += lis1.getTF() + lis2.getTF();
-				totalPointers += lis1.getNt() + lis2.getNt();
+				lee1.getValue().add(lee2.getValue());
 				los.writeNextEntry(
 					sTerm1, 
-					termID1, 
-					lis1.getNt() + lis2.getNt(),
-					lis1.getTF() + lis2.getTF(),  							 
-					0, //inverted index not built yet, so no offsets
-					(byte)0 //inverted index not built yet, so no offsets
+					lee1.getValue()
 				);
-		
-				hasMore1 = (lis1.readNextEntry()!=-1);
-				hasMore2 = (lis2.readNextEntry()!=-1);
+				hasMore1 = lis1.hasNext();
+				hasMore2 = lis2.hasNext();
 				if (hasMore1) {
-					termID1 = lis1.getTermId();
-					sTerm1 = lis1.getTerm();
+					lee1 = lis1.next();
+					termID1 = lee1.getValue().getTermId();
+					sTerm1 = lee1.getKey();
 				}
 				if (hasMore2) {
-					termID2 = lis2.getTermId();
-					sTerm2 = lis2.getTerm();
+					lee2 = lis2.next();
+					termID2 = lee2.getValue().getTermId();
+					sTerm2 = lee2.getKey();
 				}
 			}
 		}
 		if (hasMore1) {
-			lis2.close();
+			if (lis2 instanceof Closeable) {
+				((Closeable)lis2).close();
+			}
 
 			while (hasMore1) {
-				totalTokens += lis1.getTF();
-				totalPointers += lis1.getNt();
-				los.writeNextEntry(sTerm1, termID1, lis1.getNt(), lis1.getTF(), lis1.getEndOffset(), lis1.getEndBitOffset());
-				hasMore1 = (lis1.readNextEntry()!=-1);
+				los.writeNextEntry(sTerm1, lee1.getValue());
+				hasMore1 = lis1.hasNext();
 				if (hasMore1) {
-					termID1 = lis1.getTermId();
-					sTerm1 = lis1.getTerm();
+					lee1 = lis1.next();
+					termID1 = lee1.getValue().getTermId();
+					sTerm1 = lee1.getKey();
 				}
 			}
 
 			//close input file 1 stream
-			lis1.close();
+			if (lis2 instanceof Closeable) {
+				((Closeable)lis2).close();
+			}
 
 		} else if (hasMore2) {
-			lis1.close();
+			if (lis1 instanceof Closeable) {
+				((Closeable)lis1).close();
+			}
 
 			while (hasMore2) {
-				totalTokens += lis2.getTF();
-				totalPointers += lis2.getNt();	
-				los.writeNextEntry(sTerm2, termID2, lis2.getNt(), lis2.getTF(), lis2.getEndOffset(), lis2.getEndBitOffset());
-				hasMore2 = (lis2.readNextEntry()!=-1);
+				los.writeNextEntry(sTerm2, lee2.getValue());
+				hasMore2 = lis2.hasNext();
 				if (hasMore2) {
-					termID2 = lis2.getTermId();
-					sTerm2 = lis2.getTerm();
+					lee2 = lis2.next();
+					termID2 = lee2.getValue().getTermId();
+					sTerm2 = lee2.getKey();
 				}
 			}
 			//close input file 2 stream
-			lis2.close();
+			if (lis2 instanceof Closeable) {
+				((Closeable)lis2).close();
+			}
 		}
 		//close output file streams
 		los.close();
 	}
 	
-	/**
-	 * Creates the lexicon index file that contains a mapping from the 
-	 * given term id to the offset in the lexicon, in order to 
-	 * be able to retrieve the term information according to the 
-	 * term identifier. This is necessary, because the terms in the lexicon 
-	 * file are saved in lexicographical order, and we also want to have 
-	 * fast access based on their term identifier.
-	 * @param lexicon The input stream of the lexicon that we are creating the lexid file for
-	 * @param lexiconEntries The number of entries in this lexicon
-	 * @param lexiconEntrySize The size of one entry in this lexicon
-	 * @exception java.io.IOException Throws an Input/Output exception if 
-	 *			there is an input/output error. 
-	 */
-	public void createLexiconIndex(final LexiconInputStream lexicon, 
-			final int lexiconEntries, 
-			final int lexiconEntrySize) throws IOException {
-		createLexiconIndex(lexicon, lexiconEntries,lexiconEntrySize, indexPath, indexPrefix);
-	}
-	/**
-	 * Creates the lexicon index file that contains a mapping from the
-	 * given term id to the offset in the lexicon, in order to
-	 * be able to retrieve the term information according to the
-	 * term identifier. This is necessary, because the terms in the lexicon
-	 * file are saved in lexicographical order, and we also want to have
-	 * fast access based on their term identifier.
-	 * @param lexicon The input stream of the lexicon that we are creating the lexid file for
-	 * @param lexiconEntries The number of entries in this lexicon
-	 * @param lexiconEntrySize The size of one entry in this lexicon
-	 * @param path The path to the index containing the lexicon
-	 * @param prefix The prefix of the index containing the lexicon
-	 * @exception java.io.IOException Throws an Input/Output exception if
-	 * 	there is an input/output error.
-	 */
-
-	public static void createLexiconIndex(final LexiconInputStream lexicon,
-			final int lexiconEntries, final int lexiconEntrySize,
-			final String path, final String prefix) throws IOException
-	{
-		//save the offsets to a file with the same name as
-		//the lexicon and extension .lexid
-		String lexid = path +
-					ApplicationSetup.FILE_SEPARATOR +
-					prefix +
-					ApplicationSetup.LEXICON_INDEX_SUFFIX;
-		DataOutputStream dosLexid = new DataOutputStream(Files.writeFileStream(lexid));
-		createLexiconIndex(lexicon, lexiconEntries, lexiconEntrySize, dosLexid);
-	}
-	
-	public static void createLexiconIndex(final LexiconInputStream lexicon,
-				final int lexiconEntries, final int lexiconEntrySize,
-				final DataOutputStream dosLexid) throws IOException
-		{
-
-		/*
-		 * This method reads from the lexicon the term ids and stores the
-		 * corresponding offsets in an array. Then this array is written out 
-		 * in order according to the term id.
-		 */
-		long totalPointers = 0;
-		long totalTokens = 0;
-
-
-		//the i-th element of offsets contains the offset in the
-		//lexicon file of the term with term identifier equal to i.
-		long[] offsets = new long[lexiconEntries];
-		int termid = -1;
-		int i=0;
-		try{
-			while (lexicon.readNextEntry()!=-1) {
-		 		termid = lexicon.getTermId();
-				totalPointers += lexicon.getNt();
-				totalTokens += lexicon.getTF();
-				//Debugging: if an exception occurs here, then this infers that the number of entries in the lexicon
-				//has been calculated incorrectly, or that termId > lexiconEntries. termid > lexiconEntries could be
-				//a sign that the lexicon is being decoded incorrecty - eg you're using LexiconInputStream instead of
-				//UTFLexiconInputStream
-				offsets[termid] = (long)i * (long)lexiconEntrySize;
-				i++;
-			}
-		} catch (ArrayIndexOutOfBoundsException aioob) {
-			logger.error("Termid overflow while creating lexid file: NumEntries="+lexiconEntries+ " entrySize="
-				+lexiconEntrySize+ " termid="+termid, aioob);
-		}
-		lexicon.close();
-		//write out the offsets
-		for (i = 0; i < lexiconEntries; i++) {
-			dosLexid.writeLong(offsets[i]);
-		}
-		dosLexid.close();
-	}
 	
 	/** Creates a lexicon index for the specified index
 	  * @param index Index to make the lexicon index for
+	  * @deprecated use optimise instead
 	  */	
 	public static void createLexiconIndex(Index index) throws IOException
 	{
-		final LexiconInputStream lis = (LexiconInputStream)index.getIndexStructureInputStream("lexicon");
-		LexiconBuilder.createLexiconIndex(
-			lis,
-			index.getCollectionStatistics().getNumberOfUniqueTerms(), 
-			lis.getEntrySize(), 
-			index.getPath(),
-			index.getPrefix());
+		optimise(index, "lexicon");
 	}
 
-	/** Create a lexicon hash for the current index
-	  * @param lexStream lexiconinputstream to process
-	  */
-	public void createLexiconHash(final LexiconInputStream lexStream) {
-		LexiconBuilder.createLexiconHash(lexStream, indexPath, indexPrefix);
-	}
 
 	
 	/** Creates a lexicon hash for the specified index
-	 * @param index Index to make the LexiconHash for
+	 * @param index Index to make the LexiconHash the lexicoin
+	 * @deprecated use optimise instead
 	 */
 	public static void createLexiconHash(final Index index) throws IOException
 	{
-		LexiconBuilder.createLexiconHash((LexiconInputStream)index.getIndexStructureInputStream("lexicon"),
-			index.getPath(),index.getPrefix());
+		optimise(index, "lexicon");
 	}
 	
-	/**
-	 * Creates a Lexicon hash. This method reads the lexicon and finds the entries which 
-	 * start with a different letter. The offset of these entries
-	 * is used to speed up the binary search performed during retrieval.
-	 * These offsets are saved to a lex hash file beside the Lexicon in the Index.
-	 * @param lexStream LexiconInputStream to process
-	 * @param path Path to the index containing the lexicon
-	 * @param prefix Prefix of the index containing the lexicon
-	 */
-	public static void createLexiconHash(final LexiconInputStream lexStream, final String path, final String prefix) {
-		String filename = path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICON_HASH_SUFFIX;
-		try{
-			createLexiconHash(lexStream, Files.writeFileStream(filename));
-		} catch(IOException ioe) {
-			logger.error("IOException while creating hash file in LexiconBuilder.createLexiconHash: " + ioe);
-		}
-	}
 	
-	public static void createLexiconHash(final LexiconInputStream lexStream, OutputStream out)
+	/** Optimises the lexicon, eg lexid file */
+	public static void optimise(final Index index, final String structureName)
 	{
-		TIntObjectHashMap map = new TIntObjectHashMap();
-		int previousFirstChar = -1;
-		int firstChar = 0;
-		int counter = -1;
-
 		try {
-			//read all the terms in the lexicon and 
-			//mark the offset of the ones that start
-			//with a different character from the 
-			//previous entry.
-			while (lexStream.readNextEntry()!=-1) {
-				firstChar = lexStream.getTerm().charAt(0);
-				if (firstChar!=previousFirstChar) {
-					int[] boundaries = new int[] {counter, 0};
-					map.put(firstChar, boundaries);
-					previousFirstChar = firstChar;
-				}
-				counter++;
-			}
-			lexStream.close();
-
-	
-			//NB: map should not be too large, say 26+10, more if UTF characters			
-			
-			// after reading all the entries, update the upper 
-			// boundary, which is zero from the previous step.
-			int[] mapKeys = map.keys();
-			Arrays.sort(mapKeys);
-			final int mapKeysSize = mapKeys.length;
-			for (int i=0; i<mapKeysSize-1; i++) {
-				int nextLowerBoundary = ((int[])map.get(mapKeys[i+1]))[0];
-				int[] currentBoundaries = (int[])map.get(mapKeys[i]);
-				currentBoundaries[1] = nextLowerBoundary;
-				map.put(mapKeys[i], currentBoundaries);
-			}
-			//do something about the last entry
-			int nextLowerBoundary = counter;
-			int[] currentBoundaries = (int[])map.get(mapKeys[mapKeysSize-1]);
-			currentBoundaries[1] = nextLowerBoundary;
-			map.put(mapKeys[mapKeysSize-1], currentBoundaries);
-			ObjectOutputStream oos = new ObjectOutputStream(out);
-			oos.writeObject(map);
-			oos.close();
-			//logger.debug("Wrote lexicon hash to "+ filename);	
+			logger.info("Optimising structure "+structureName);
+			CollectionStaticticsCounter<LexiconEntry> counter = new BasicLexiconCollectionStaticticsCounter(index);
+			FSOMapFileLexicon.optimise(structureName, index, counter);
+			counter.close();
 		} catch(IOException ioe) {
-			logger.error("IOException while reading the lexicon in LexiconBuilder.createLexiconHash: " + ioe);
-		}
-	}
-
-	public static void main(String args[])
-	{
-		boolean USE_UTF = Boolean.parseBoolean(ApplicationSetup.getProperty("string.use_utf", "false"));
-		
-		try{
-			if ((args.length == 3||args.length ==4  )&& args[0].equals("--createlexiconindex"))
-			{
-				if (USE_UTF)
-					createLexiconIndex(
-							new UTFLexiconInputStream(args[1], args[2]),
-							args.length == 4
-								? Integer.parseInt(args[3])
-								: Lexicon.numberOfEntries(args[1] + ApplicationSetup.FILE_SEPARATOR + args[2] + ApplicationSetup.LEXICONSUFFIX),
-							Lexicon.lexiconEntryLength,
-							args[1], args[2]);
-				else	
-					createLexiconIndex(
-							new LexiconInputStream(args[1], args[2]),
-							args.length == 4
-							? Integer.parseInt(args[3])
-									: Lexicon.numberOfEntries(args[1] + ApplicationSetup.FILE_SEPARATOR + args[2] + ApplicationSetup.LEXICONSUFFIX),
-									Lexicon.lexiconEntryLength,
-									args[1], args[2]);
-			}
-			else if (args.length == 3 && args[0].equals("--createlexiconhash"))
-			{
-				if (USE_UTF)
-					createLexiconHash( new UTFLexiconInputStream(args[1], args[2]), args[1], args[2]);
-				else
-					createLexiconHash( new LexiconInputStream(args[1], args[2]), args[1], args[2]);
-			}
-			else
-			{
-				logger.fatal("Usage: uk.ac.gla.terrier.indexing.structures.LexiconBuilder {--createlexiconindex|--createlexiconhash} /path/to/index fileprefix [numEntries]");
-				logger.fatal("Exiting ...");
-				System.exit(0);
-			}
-		} catch (IOException ioe) {
-			logger.error("IOException while building lexicon index : ",ioe);
-			
+			logger.error("IOException while creating optimising lexicon called " + structureName, ioe);
 		}
 	}
 
 
 	/** return the lexicon input stream for the current index at the specified filename */	
-	protected LexiconInputStream getLexInputStream(String filename)
+	@SuppressWarnings("unchecked")
+	protected Iterator<Map.Entry<String,LexiconEntry>> getLexInputStream(String structureName) throws IOException
 	{
-		LexiconInputStream li = null;
-		try{
-			li = (LexiconInputStream) lexiconInputStream.getConstructor(String.class).newInstance(filename);
-		} catch (Exception e) {
-			logger.error("Problem loading a LexiconInputStream", e);
-		}
-		return li;
+		return new FSOMapFileLexicon.MapFileLexiconIterator(structureName, index.getPath(), index.getPrefix(), 
+				(FixedSizeWriteableFactory<Text>)index.getIndexStructure(defaultStructureName+"-keyfactory"), 
+				(FixedSizeWriteableFactory<LexiconEntry>)index.getIndexStructure(defaultStructureName+"-valuefactory"));
 	}
 
 	/** return the lexicon outputstream or the current index at the specified filename */
-	protected LexiconOutputStream getLexOutputStream(String filename)
+	@SuppressWarnings("unchecked")
+	protected LexiconOutputStream<String> getLexOutputStream(String structureName) throws IOException
 	{
-		LexiconOutputStream lo = null;
-		try{
-			lo = (LexiconOutputStream) lexiconOutputStream.getConstructor(String.class).newInstance(filename);
-		} catch (Exception e) {
-			logger.error("Problem loading a LexiconOutputStream", e);
-		}
-		return lo;
+		return new FSOMapFileLexiconOutputStream(
+				index.getPath(), index.getPrefix(), 
+				structureName, 
+				(FixedSizeWriteableFactory<Text>)index.getIndexStructure(defaultStructureName+"-keyfactory"));
 	}
 
 }
Index: src/uk/ac/gla/terrier/structures/indexing/InvertedIndexBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/InvertedIndexBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/InvertedIndexBuilder.java	(working copy)
@@ -33,15 +33,22 @@
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
 
+import org.apache.hadoop.io.Text;
 import org.apache.log4j.Logger;
+
 import uk.ac.gla.terrier.compression.BitOut;
 import uk.ac.gla.terrier.compression.BitOutputStream;
+import uk.ac.gla.terrier.structures.Closeable;
 import uk.ac.gla.terrier.structures.DirectIndexInputStream;
-import uk.ac.gla.terrier.structures.DocumentIndex;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexicon;
+import uk.ac.gla.terrier.structures.FSOMapFileLexiconOutputStream;
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
 import uk.ac.gla.terrier.utility.Files;
@@ -84,8 +91,6 @@
  */
 public class InvertedIndexBuilder {
 
-	/** class to be used as a lexiconinputstream. set by this and child classes */
-	protected Class lexiconInputStream = null;
 	/** class to be used as a lexiconoutpustream. set by this and child classes */
 	protected Class lexiconOutputStream = null;
 
@@ -104,13 +109,6 @@
 		}
 	}
 
-	/** the directory in which index files should be created */
-	protected String indexPath;
-	/** the first part of the filename component of index files */
-	protected String indexPrefix;
-	
-	protected String indexPathPrefix;
-	
 	/** The number of unique terms in the vocabulary.*/
 	public int numberOfUniqueTerms;
 	
@@ -128,6 +126,8 @@
 	
 	protected Index index = null;
 	
+	protected String structureName = null;
+	
 	/** The number of pointers to be processed in an interation. This directly corresponds to the
 	  * property <tt>invertedfile.processpointers</tt>. If this property is set and > 0, then each
 	  * iteration of the inverted index creation will be done to a set number of pointers, not a set
@@ -140,62 +140,20 @@
 	 */
 	protected BitOut file;
 
-	/**
-	 * Constructor of the class InvertedIndex.
-	 * @deprecated
-	 */
-	public InvertedIndexBuilder(String Path, String Prefix)
-	{
-		indexPath = Path; indexPrefix = Prefix;
-		indexPathPrefix = indexPath + ApplicationSetup.FILE_SEPARATOR + indexPrefix;
-		try{
-			file = new BitOutputStream(indexPathPrefix + ApplicationSetup.IFSUFFIX);
-		} catch (IOException ioe) {
-			logger.error("creating BitOutputStream for writing the inverted file : ", ioe);
-		}
-		lexiconInputStream = LexiconInputStream.class;
-		lexiconOutputStream = LexiconOutputStream.class;
-	}
-	
-	public InvertedIndexBuilder(Index i)
+	public InvertedIndexBuilder(Index i, String _structureName)
 	{
 		this.index = i;
-		indexPath = index.getPath(); indexPrefix = index.getPrefix();
-		indexPathPrefix = indexPath + ApplicationSetup.FILE_SEPARATOR + indexPrefix;
+		this.structureName = _structureName;
+		
 		try{
-			file = new BitOutputStream(indexPathPrefix + ApplicationSetup.IFSUFFIX);
+			file = new BitOutputStream(index.getPath() + "/"+ index.getPrefix() + "." +structureName + ".bf");
 		} catch (IOException ioe) {
 			logger.error("creating BitOutputStream for writing the inverted file : ", ioe);
 		}
-		lexiconInputStream = LexiconInputStream.class;
 		lexiconOutputStream = LexiconOutputStream.class;
 	}
 
 
-	/**
-	 * A default constructor of the class InvertedIndex.
-	 * @deprecated
-	 */
-	public InvertedIndexBuilder() {
-		this(ApplicationSetup.TERRIER_INDEX_PATH,
-			ApplicationSetup.TERRIER_INDEX_PREFIX);
-	}
-
-	/**
-	 * Creates an instance of the InvertedIndex
-	 * class using the given filename.
-	 * @param filename The name of the inverted file
-	 * @deprecated Use this() or this(String, String)
-	 */
-	public InvertedIndexBuilder(String filename) {
-		try{
-			file = new BitOutputStream(filename);
-		} catch (IOException ioe) {
-			logger.error("Creating BitOutputStream for writing the direct file : ", ioe);
-		}
-		lexiconInputStream = LexiconInputStream.class;
-		lexiconOutputStream = LexiconOutputStream.class;
-	}
 
 	/**
 	 * Closes the underlying bit file.
@@ -208,21 +166,22 @@
 	 * Creates the inverted index using the already created direct index,
 	 * document index and lexicon.
 	 */
+	@SuppressWarnings("unchecked")
 	public void createInvertedIndex() {
 		try {
 			Runtime r = Runtime.getRuntime();
 			logger.debug("creating inverted index");
-			final String LexiconFilename = indexPathPrefix + ApplicationSetup.LEXICONSUFFIX;
+			final String LexiconFilename = index.getPath() + "/" + index.getPrefix() + ".lexicon";
 			
 			final int numberOfDocuments = index.getCollectionStatistics().getNumberOfDocuments();
 		
 			long assumedNumberOfPointers = Long.parseLong(index.getIndexProperty("num.Pointers", "0"));				
 			long numberOfTokens = 0;
 			long numberOfPointers = 0;
+			int numberOfUniqueTerms = index.getLexicon().numberOfEntries();
+			Iterator<Map.Entry<String,LexiconEntry>> lexiconStream = 
+				(Iterator<Map.Entry<String,LexiconEntry>>)index.getIndexStructureInputStream("lexicon");
 		
-			LexiconInputStream lexiconStream = getLexInputStream(LexiconFilename);
-			numberOfUniqueTerms = lexiconStream.numberOfEntries();
-			final int fieldsCount = FieldScore.FIELDS_COUNT;
 			//A temporary file for storing the updated lexicon file, after
 			// creating the inverted file
 			DataOutputStream dos = new DataOutputStream(Files.writeFileStream(LexiconFilename.concat(".tmp2")));
@@ -374,52 +333,56 @@
 			this.numberOfUniqueTerms = numberOfUniqueTerms;
 			this.numberOfPointers = numberOfPointers;
 
-			lexiconStream.close();
+			if (lexiconStream instanceof Closeable) {
+				((Closeable)lexiconStream).close();
+			}
 			dos.close();
 			//finalising the lexicon file with the updated information
 			//on the frequencies and the offsets
-			
 			//reading the original lexicon
-			LexiconInputStream lis = getLexInputStream(LexiconFilename);
+			lexiconStream = (Iterator<Map.Entry<String,LexiconEntry>>)index.getIndexStructureInputStream("lexicon");
+			
 			
 			//the updated lexicon
-			LexiconOutputStream los = getLexOutputStream(LexiconFilename.concat(".tmp3"));
+			LexiconOutputStream<String> los = getLexOutputStream("tmplexicon");
 			
 			//the temporary data containing the offsets
 			DataInputStream dis = new DataInputStream(Files.openFileStream(LexiconFilename.concat(".tmp2")));
 			
-			while (lis.readNextEntryBytes() != -1) {
-				los.writeNextEntry(lis.getTermCharacters(), lis.getTermId(),
-						lis.getNt(), 
-						dis.readInt(), //the term frequency
-						dis.readLong(), //end byte offset
-						dis.readByte());//end bit offset
+			while(lexiconStream.hasNext())
+			{
+				Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
+				LexiconEntry value = lee.getValue();
+				value.setPosition(dis.readLong(), dis.readByte());
+				los.writeNextEntry(lee.getKey(), value);
 			}
-			lis.close();
+			if (lexiconStream instanceof Closeable) {
+				((Closeable)lexiconStream).close();
+			}
 			los.close();
 			dis.close();
-            if (! Files.delete(LexiconFilename))
-                logger.error("delete file .lex failed!");
-            if (! Files.delete(LexiconFilename.concat(".tmp2")))
-                logger.error("delete file .lex.tmp2 failed!");
-            if (! Files.rename(LexiconFilename.concat(".tmp3"), LexiconFilename))
-                logger.error("rename file .lex.tmp3 to .lex failed!");
+			Files.delete(LexiconFilename.concat(".tmp2"));
+			FSOMapFileLexicon.deleteMapFileLexicon("lexicon", index.getPath(), index.getPrefix());
+			FSOMapFileLexicon.renameMapFileLexicon(
+					"tmplexicon", index.getPath(), index.getPrefix(), 
+					"lexicon", index.getPath(), index.getPrefix());
 			
 			index.addIndexStructure(
-					"inverted", 
+					structureName, 
 					"uk.ac.gla.terrier.structures.InvertedIndex", 
-					"uk.ac.gla.terrier.structures.Lexicon,java.lang.String,java.lang.String", 
-					"lexicon,path,prefix");
+					"uk.ac.gla.terrier.structures.Index,java.lang.String", 
+					"index,structureName");
 			index.addIndexStructureInputStream(
-                    "inverted",
+					structureName,
                     "uk.ac.gla.terrier.structures.InvertedIndexInputStream",
-                    "java.lang.String,java.lang.String,uk.ac.gla.terrier.structures.LexiconInputStream",
-                    "path,prefix,lexicon-inputstream");
+                    "uk.ac.gla.terrier.structures.Index,java.lang.String,java.util.Iterator",
+                    "index,structureName,lexicon-inputstream");
 			index.setIndexProperty("num.inverted.fields.bits", ""+FieldScore.FIELDS_COUNT );
 			//should be already set, but in case their not
 			index.setIndexProperty("num.Terms", ""+numberOfUniqueTerms);
 			index.setIndexProperty("num.Tokens", ""+numberOfTokens);
 			index.setIndexProperty("num.Pointers", ""+numberOfPointers);
+			index.flush();
 			System.gc();
 			
 		} catch (IOException ioe) {
@@ -427,6 +390,16 @@
 		}
 	}
 	
+	protected TIntArrayList[] createPointerForTerm(LexiconEntry le)
+	{
+		TIntArrayList[] tmpArray = new TIntArrayList[3];
+		final int tmpNT = le.getDocumentFrequency();
+		tmpArray[0] = new TIntArrayList(tmpNT);
+		tmpArray[1] = new TIntArrayList(tmpNT);
+		tmpArray[2] = new TIntArrayList(tmpNT);
+		return tmpArray;
+	}
+	
 	/** Iterates through the lexicon, until it has reached the given number of pointers
 	  * @param PointersToProcess Number of pointers to stop reading the lexicon after
 	  * @param lexiconStream the lexicon input stream to read 
@@ -436,7 +409,7 @@
 	  */
 	protected IntLongTuple scanLexiconForPointers(
 		final long PointersToProcess, 
-		final LexiconInputStream lexiconStream, 
+		final Iterator<Map.Entry<String,LexiconEntry>> lexiconStream, 
 		final TIntIntHashMap codesHashMap,
 		final ArrayList<TIntArrayList[]> tmpStorageStorage)
 		throws IOException
@@ -446,27 +419,21 @@
 		int j=0; //counter of loop iterations
 		while(numberOfPointersThisIteration < PointersToProcess) {
 		
-			if (lexiconStream.readNextEntry() == -1)
+			if (! lexiconStream.hasNext())
 				break;
 			
-			processTerms++;
-			
-			TIntArrayList[] tmpArray = new TIntArrayList[3];
-			final int tmpNT = lexiconStream.getNt();
-			tmpArray[0] = new TIntArrayList(tmpNT);
-			tmpArray[1] = new TIntArrayList(tmpNT);
-			tmpArray[2] = new TIntArrayList(tmpNT);
-			
-			numberOfPointersThisIteration += tmpNT;
-			
+			Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
+			LexiconEntry le = lee.getValue();
 			
-			tmpStorageStorage.add(tmpArray);
+			processTerms++;			
+			numberOfPointersThisIteration += le.getDocumentFrequency();		
+			tmpStorageStorage.add(createPointerForTerm(le));
 			
 			//the class TIntIntHashMap return zero when you look up for a
 			//the value of a key that does not exist in the hash map.
 			//For this reason, the values that will be inserted in the 
 			//hash map are increased by one. 
-			codesHashMap.put(lexiconStream.getTermId(), j + 1);
+			codesHashMap.put(le.getTermId(), j + 1);
 			
 			//increment counter
 			j++;
@@ -488,7 +455,7 @@
 	  */
 	protected IntLongTuple scanLexiconForTerms(
 		final int processTerms, 
-		final LexiconInputStream lexiconStream, 
+		final Iterator<Map.Entry<String,LexiconEntry>> lexiconStream, 
 		final TIntIntHashMap codesHashMap,
 		TIntArrayList[][] tmpStorage)
 		throws IOException
@@ -498,11 +465,14 @@
 		long numberOfPointersThisIteration = 0;
 		for (; j < processTerms; j++) {
 		
-			if (lexiconStream.readNextEntry() == -1)
+			if (! lexiconStream.hasNext())
 				break;
 		
+			Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
+			LexiconEntry le = lee.getValue();
+		
 			TIntArrayList[] tmpArray = new TIntArrayList[3];
-			final int tmpNT = lexiconStream.getNt();
+			final int tmpNT = le.getDocumentFrequency();
 			tmpArray[0] = new TIntArrayList(tmpNT);
 			tmpArray[1] = new TIntArrayList(tmpNT);
 			tmpArray[2] = new TIntArrayList(tmpNT);
@@ -516,7 +486,7 @@
 			//the value of a key that does not exist in the hash map.
 			//For this reason, the values that will be inserted in the 
 			//hash map are increased by one. 
-			codesHashMap.put(lexiconStream.getTermId(), j + 1);
+			codesHashMap.put(le.getTermId(), j + 1);
 		}
 		if (logger.isDebugEnabled())
 			logger.debug(
@@ -538,10 +508,7 @@
 		throws IOException 
 	{
 		//scan the direct file
-		DirectIndexInputStream directInputStream =
-			index != null
-				? (DirectIndexInputStream)index.getIndexStructureInputStream("direct")
-				: new DirectIndexInputStream(indexPath, indexPrefix);
+		DirectIndexInputStream directInputStream = (DirectIndexInputStream)index.getIndexStructureInputStream("direct");
 		int[][] documentTerms = null;
 		int p = 0; //a document counter;
 		final boolean useFieldInformation = this.useFieldInformation;
@@ -582,8 +549,7 @@
 	protected void traverseDirectFile(int[][][] tmpStorage, int[] indices, TIntIntHashMap codesHashMap) 
 		throws IOException
 	{
-		DirectIndexInputStream directInputStream = new DirectIndexInputStream(
-			indexPath, indexPrefix);
+		DirectIndexInputStream directInputStream = (DirectIndexInputStream)index.getIndexStructureInputStream("direct");
 		int[][] documentTerms = null;
 		int[] documentTerms0 = null;
 		int[] documentTerms1 = null;
@@ -652,14 +618,13 @@
 		throws IOException
 	{
 		//write to the inverted file. We should note that the lexicon 
-		//file should be updated as well with the term frequency and
-		//the endOffset and endBitOffset.
-		
-		//remove this, as it now happens at the end of this method
-		//the first call is made at the start of createInvertedIndex
-		//file.writeReset();
+		//should be updated with the start bit and byte offset for this
+		//set of postings.
 		int frequency; long numTokens = 0;
 		for (int j = 0; j < processTerms; j++) {
+			dos.writeLong(file.getByteOffset());
+			dos.writeByte(file.getBitOffset());
+			
 			frequency = 0; //the term frequency
 			TIntArrayList[] tmpMatrix = tmpStorage[j];
 			final int[] tmpMatrix0 = tmpMatrix[0].toNativeArray();
@@ -705,21 +670,17 @@
 				}
 			}
 			
-			long endOffset = file.getByteOffset();
-			byte endBitOffset = file.getBitOffset();
-			endBitOffset--;
-			if (endBitOffset < 0 && endOffset > 0) {
-				endBitOffset = 7;
-				endOffset--;
-			}
+			//long endOffset = file.getByteOffset();
+			//byte endBitOffset = file.getBitOffset();
+			//endBitOffset--;
+			//if (endBitOffset < 0 && endOffset > 0) {
+			//	endBitOffset = 7;
+			//	endOffset--;
+			//}
 			numTokens += frequency;
-			dos.writeInt(frequency);
-			dos.writeLong(endOffset);
-			dos.writeByte(endBitOffset);
+			//dos.writeInt(frequency);
+			
 		}
-		//file.writeFlush();
-		//we have to force a reset here, as otherwise the buffer isn't cleared.
-		//file.writeReset();
 		return numTokens;
 	}
 	
@@ -734,124 +695,6 @@
 	 */
 	protected int processTerms = Integer.parseInt(ApplicationSetup.getProperty("invertedfile.processterms", "75000"));
 	
-	/*
-	for (int i = 0; i < numberOfUniqueTerms; i = i + processTerms) {
-		//set the number of terms to process from the lexicon
-		if ((i + processTerms) > numberOfUniqueTerms)
-			processTerms = (int) numberOfUniqueTerms - i;
-		//start processing part of the lexicon
-		startProcessingLexicon = System.currentTimeMillis();
-		//preparing the data structures to store the data
-		int[] indices = new int[processTerms];
-		int[][][] tmpStorage = new int[processTerms][][];
-		TIntIntHashMap codesHashMap = new TIntIntHashMap(processTerms);
-		int numberOfPointersPerIteration = 0;
-	
-		int numOfFields = 2;
-		if (useFieldInformation)
-			numOfFields = 3;
-	
-		for (int j = 0; j < processTerms; j++) {
-			lexiconStream.readNextEntry();
-			//int[][] tmpArray = new int[numOfFields][lexiconStream.getNt()];
-			numberOfPointersPerIteration += lexiconStream.getNt();
-			//tmpStorage.add(tmpArray);
-			tmpStorage[j] = new int[numOfFields][lexiconStream.getNt()];
-			//the class TIntIntHashMap return zero when you look up for
-			// a the value of a key that does not exist in the hash map.
-			//For this reason, the values that will be inserted in the
-			//hash map are increased by one.
-			codesHashMap.put(lexiconStream.getTermId(), j + 1);
-		}
-		numberOfPointers += numberOfPointersPerIteration;
-		endProcessingLexicon = System.currentTimeMillis();
-		startTraversingDirectFile = System.currentTimeMillis();
-		//scan the direct file
-		//uses indices, tmpStorage and codesHashMap
-		traverseDirectFile(tmpStorage, indices, codesHashMap);
-		//end of traversing the
-		endTraversingDirectFile = System.currentTimeMillis();
-		startWritingInvertedFile = System.currentTimeMillis();
-		//write to the inverted file. We should note that the lexicon
-		//file should be updated as well with the term frequency and
-		//the endOffset and endBitOffset.
-		//file.writeReset();
-		int frequency;
-		int[][] tmpMatrix = null;
-		int[] tmpMatrix0 = null;
-		int[] tmpMatrix1 = null;
-	
-		for (int j = 0; j < processTerms; j++) {
-			frequency = 0; //the term frequency
-			//tmpMatrix = (int[][]) tmpStorage.elementAt(j);
-			tmpMatrix = tmpStorage[j];
-			tmpMatrix0 = tmpMatrix[0];
-			tmpMatrix1 = tmpMatrix[1];
-	
-			//we do not need to sort because the documents are read in
-			//order of docid, and therefore the arrays are already
-			// sorted.
-			if (useFieldInformation) {
-				int[] tmpMatrix2 = tmpMatrix[2];
-				//write the first entry
-				file.writeGamma(tmpMatrix0[0] + 1);
-				frequency += tmpMatrix1[0];
-				file.writeUnary(tmpMatrix1[0]);
-				file.writeBinary(fieldsCount, tmpMatrix2[0]);
-				final int tmpMatrix0Length = tmpMatrix0.length;
-				for (int k = 1; k < tmpMatrix0Length; k++) {
-					file.writeGamma(tmpMatrix0[k] - tmpMatrix0[k - 1]);
-					frequency += tmpMatrix1[k];
-					file.writeUnary(tmpMatrix1[k]);
-					file.writeBinary(fieldsCount, tmpMatrix2[k]);
-				}
-			} else {
-				//write the first entry
-				file.writeGamma(tmpMatrix0[0] + 1);
-				frequency += tmpMatrix1[0];
-				file.writeUnary(tmpMatrix1[0]);
-				final int tmpMatrix0Length = tmpMatrix0.length;
-				for (int k = 1; k < tmpMatrix0Length; k++) {
-					file.writeGamma(tmpMatrix0[k] - tmpMatrix0[k - 1]);
-					frequency += tmpMatrix1[k];
-					file.writeUnary(tmpMatrix1[k]);
-				}
-			}
-	
-			long endOffset = file.getByteOffset();
-			byte endBitOffset = file.getBitOffset();
-			endBitOffset--;
-			if (endBitOffset < 0 && endOffset > 0) {
-				endBitOffset = 7;
-				endOffset--;
-			}
-			numberOfTokens += frequency;
-			dos.writeInt(frequency);
-			dos.writeLong(endOffset);
-			dos.writeByte(endBitOffset);
-		}
-		//file.writeFlush();
-		endWritingInvertedFile = System.currentTimeMillis();
-	
-		System.err.println("time to process part of lexicon: "
-			+ ((endProcessingLexicon - startProcessingLexicon) / 1000D));
-		System.err.println("time to traverse direct file: "
-			+ ((endTraversingDirectFile - startTraversingDirectFile) / 1000D));
-		System.err.println("time to write inverted file: "
-			+ ((endWritingInvertedFile - startWritingInvertedFile) / 1000D));
-		System.err.println("time to perform one iteration: "
-			+ ((endWritingInvertedFile - startProcessingLexicon) / 1000D));
-		System.err.println("number of pointers processed: "
-			+ numberOfPointersPerIteration);
-		
-		indices = null;
-		tmpStorage  = null; 
-		codesHashMap.clear(); 
-		codesHashMap = null;
-	
-	}
-	*/
-	
 	public static void displayMemoryUsage(Runtime r)
 	{
 		if (logger.isDebugEnabled())
@@ -862,26 +705,14 @@
 		);
 	}
 
-	public LexiconInputStream getLexInputStream(String filename)
-	{
-		LexiconInputStream li = null;
-		try{
-			li = (LexiconInputStream) lexiconInputStream.getConstructor(String.class).newInstance(filename);
-		} catch (Exception e) {
-			logger.error("Problem loading a LexiconInputStream", e);
-		}
-		return li;
-	}
 
-	public LexiconOutputStream getLexOutputStream(String filename)
+	@SuppressWarnings("unchecked")
+	protected LexiconOutputStream<String> getLexOutputStream(String structureName) throws IOException
 	{
-		LexiconOutputStream lo = null;
-		try{
-			lo = (LexiconOutputStream) lexiconOutputStream.getConstructor(String.class).newInstance(filename);
-		} catch (Exception e) {
-			logger.error("Problem loading a LexiconOutputStream", e);
-		}
-		return lo;
+		return new FSOMapFileLexiconOutputStream(
+				index.getPath(), index.getPrefix(), 
+				structureName, 
+				(FixedSizeWriteableFactory<Text>)index.getIndexStructure("lexicon-keyfactory"));
 	}
 
 }
Index: src/uk/ac/gla/terrier/structures/indexing/UTFLexiconBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/UTFLexiconBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/UTFLexiconBuilder.java	(working copy)
@@ -1,136 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is UTFLexiconBuilder.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
- */
-package uk.ac.gla.terrier.structures.indexing;
-
-import java.io.IOException;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFLexicon;
-import uk.ac.gla.terrier.structures.UTFLexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconOutputStream;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-/**
- * Builds temporary lexicons during indexing a collection and
- * merges them when the indexing of a collection has finished.
- * @author Craig Macdonald &amp; Vassilis Plachouras 
- * @version $Revision: 1.16 $
- */
-public class UTFLexiconBuilder extends LexiconBuilder
-{
-	private static Logger logger = Logger.getRootLogger();
-	/**
-	 * A default constructor of the class. The lexicon is built in the 
-	 * default path and file: ApplicationSetup.TERRIER_INDEX_PATH and 
-	 * ApplicationSetup.TERRIER_INDEX_PREFIX respectively.
-	 * @deprecated
-	 */
-	public UTFLexiconBuilder() 	{
-		super();
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		lexiconInputStream = UTFLexiconInputStream.class;
-	}
-
-	/** 
-	 * Creates an instance of the class, given the path
-	 * to save the temporary lexicons.
-	 * @param pathname String the path to save the temporary and final lexicons.
-	 * @param prefix String the filename component of the lexicons
-	 */
-	public UTFLexiconBuilder(String pathname, String prefix) {
-		super(pathname, prefix);
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		lexiconInputStream = UTFLexiconInputStream.class;
-	}
-	
-	public UTFLexiconBuilder(Index i)
-	{
-		super(i);
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		lexiconInputStream = UTFLexiconInputStream.class;
-	}
-
-	
-	/** 
-	 * Processing the lexicon after finished creating the 
-	 * direct and document indexes.
-	 */
-	public void finishedDirectIndexBuild()
-	{
-		if(logger.isInfoEnabled()){
-			logger.info("flushing lexicon to disk after the direct index completed");
-		}
-		 //only write a temporary lexicon if there are any items in it
-		if (TempLex.getNumberOfNodes() > 0)
-			writeTemporaryLexicon();
-		TempLex = null;
-
-		//merges the temporary lexicons
-		if (tempLexFiles.size() > 0)
-		{
-			try{
-				merge(tempLexFiles);
-				
-				//creates the offsets file
-				final String lexiconFilename = 
-					indexPath + ApplicationSetup.FILE_SEPARATOR + 
-					indexPrefix + ApplicationSetup.LEXICONSUFFIX;
-				LexiconInputStream lis = getLexInputStream(lexiconFilename);
-				createLexiconIndex(
-					lis,
-					lis.numberOfEntries(),
-					UTFLexicon.lexiconEntryLength
-					); 
-				TermCount = lis.numberOfEntries();	
-				if (index != null)
-				{
-					index.addIndexStructure("lexicon", "uk.ac.gla.terrier.structures.UTFLexicon");
-					index.addIndexStructureInputStream("lexicon", "uk.ac.gla.terrier.structures.UTFLexiconInputStream");
-					index.setIndexProperty("num.Terms", ""+lis.numberOfEntries());
-		   			index.setIndexProperty("num.Pointers", ""+lis.getNumberOfPointersRead());
-				}
-		
-			} catch (IOException ioe) {
-				logger.error("Indexing failed to write a lexicon index file to disk", ioe);
-			}
-		}
-		else
-			logger.warn("No temporary lexicons to merge, skipping");
-	}
-
-	@Override
-	public void finishedInvertedIndexBuild() {
-		super.finishedInvertedIndexBuild();
-		if (index != null)
-		{
-			index.addIndexStructure("lexicon", "uk.ac.gla.terrier.structures.UTFLexicon");
-		}
-	}
-	
-}
Index: src/uk/ac/gla/terrier/structures/indexing/DirectIndexBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/DirectIndexBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/DirectIndexBuilder.java	(working copy)
@@ -46,9 +46,6 @@
 	/** The gamma compressed file containing the terms. */
 	protected BitOut file;
 
-	/** The number of documents to be indexed before flushing the data to disk.*/
-	protected static final int DocumentsPerFlush = ApplicationSetup.BUNDLE_SIZE;
-
 	/** The number of different fields that are used for indexing field information.*/
 	protected static final int fieldTags = FieldScore.FIELDS_COUNT;
 
@@ -121,13 +118,6 @@
 		/* find out where we are */
 		FilePosition rtr = getLastEndOffset();
 		
-		/* flush to disk if necessary */
-		if (DocumentsSinceFlush++ >= DocumentsPerFlush)
-		{
-			flushBuffer();
-			resetBuffer();
-			DocumentsSinceFlush = 0;
-		}
 		/* and then return where the position of the last 
 		 * write to the DirectIndex */
 		return rtr;
@@ -177,8 +167,6 @@
 	 */
 	public void finishedCollections()
 	{
-		flushBuffer();
-		resetBuffer();
 		DocumentsSinceFlush = 0;
 		logger.info("flush direct index");
 		try{
@@ -199,13 +187,7 @@
 			index.setIndexProperty("num.direct.fields.bits", ""+fieldTags);
 		}
 	}
-	/** 
-	 * Flushes the data to disk.
-	 * @deprecated since 2.0
-	 */
-	public void flushBuffer() {
-		//file.flush();
-	}
+
 	/** 
 	 * Returns the current offset in the direct index.
 	 * @return FilePosition the offset in the direct index.
@@ -224,13 +206,7 @@
 	
 		return new FilePosition(endByte, endBit);
 	}
-	/**
-	 * Resets the internal buffer for writing data. This method should
-	 * be called before adding any documents to the direct index.
-	 */
-	public void resetBuffer() {
-		//file.writeReset();
-	}
+	
 	/**
 	 * Closes the underlying gamma compressed file.
 	 */
Index: src/uk/ac/gla/terrier/structures/indexing/BlockLexiconBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/BlockLexiconBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/BlockLexiconBuilder.java	(working copy)
@@ -26,299 +26,18 @@
  *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk> 
  */
 package uk.ac.gla.terrier.structures.indexing;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.PriorityQueue;
-
-import uk.ac.gla.terrier.structures.BlockLexiconInputStream;
-import uk.ac.gla.terrier.structures.BlockLexiconOutputStream;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.Lexicon;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
-import uk.ac.gla.terrier.structures.LexiconOutputStream;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
 /**
  * Builds a block lexicon using block frequencies.
- * @author Douglas Johnson, Vassilis Plachouras &amp; Craig Macdonald
+ * @author Craig Macdonald
  * @version $Revision: 1.32 $
  */
 public class BlockLexiconBuilder extends LexiconBuilder
 {
-	
-	
-	/**
-	 * A default constructor of the class. The block lexicon is built in the 
-	 * default path and file: ApplicationSetup.TERRIER_INDEX_PATH and 
-	 * ApplicationSetup.TERRIER_INDEX_PREFIX respectively.
-	 */
-	public BlockLexiconBuilder()
-	{
-		this(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
-	}
-
-	/**
-	 * Creates an instance of the class, given the path
-	 * to save the final and temporary lexicons.
-	 * @param pathname String the path to save the temporary lexicons.
-	 */
-	public BlockLexiconBuilder(String pathname, String prefix) {
-		super(pathname, prefix);
-		LexiconMapClass = BlockLexiconMap.class;
-		lexiconOutputStream = BlockLexiconOutputStream.class;
-		lexiconInputStream = BlockLexiconInputStream.class;
-		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-	}
-	
-	public BlockLexiconBuilder(Index i)
-	{
-		super(i);
-		LexiconMapClass = BlockLexiconMap.class;
-		lexiconOutputStream = BlockLexiconOutputStream.class;
-		lexiconInputStream = BlockLexiconInputStream.class;
-		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-	}
-
-	/**
-	 * The method that performs processing of the lexicon after the
-	 * creation of the direct index has been completed. It flushes to 
-	 * disk the current temporary lexicon, and it starts the merging
-	 * of the temporary lexicons and the creation of the lexicon index. 
-	 */
-	public void finishedDirectIndexBuild()
-	{
-		logger.info("flushing block lexicon to disk after the direct index completed");
-		 //only write a temporary lexicon if there are any items in it
-		if (TempLex.getNumberOfNodes() > 0)
-			writeTemporaryLexicon();
-		TempLex = null;
-
-		//merges the temporary lexicons
-		if (tempLexFiles.size() > 0)
-			try{
-				merge(tempLexFiles);
-	
-				//creates the offsets file
-				final String lexiconFilename = 
-							indexPath + ApplicationSetup.FILE_SEPARATOR + 
-							indexPrefix + ApplicationSetup.LEXICONSUFFIX;
-				LexiconInputStream lis = getLexInputStream(lexiconFilename);
-				createLexiconIndex(
-						lis,
-						lis.numberOfEntries(),
-						/* after inverted index is built, the lexicon will be transformed into a
-						 * normal lexicon, without block frequency */
-						Lexicon.lexiconEntryLength 
-						);
-				TermCount = lis.numberOfEntries();
-				if (index != null)
-				{
-					index.addIndexStructure("lexicon", "uk.ac.gla.terrier.structures.BlockLexicon");
-					index.addIndexStructureInputStream("lexicon", "uk.ac.gla.terrier.structures.BlockLexiconInputStream");
-					index.setIndexProperty("num.Terms", ""+lis.numberOfEntries());
-					index.setIndexProperty("num.Pointers", ""+lis.getNumberOfPointersRead());
-				}
-			} catch(IOException ioe){
-				logger.error("Indexing failed to merge temporary lexicons to disk. ",ioe);
-			}
-		else
-			logger.warn("No temporary lexicons to merge, skipping");
-	}
-
-	/** Merge the two LexiconInputStreams into the given LexiconOutputStream
-	  * @param lis1 First lexicon to be merged
-	  * @param lis2 Second lexicon to be merged
-	  * @param los Lexion to be merged to
-	  */
-	protected void mergeTwoLexicons(
-			LexiconInputStream blis1,
-			LexiconInputStream blis2,
-			LexiconOutputStream blos) throws IOException
+	public BlockLexiconBuilder(Index i, String _structureName)
 	{
-		final BlockLexiconInputStream lis1 = (BlockLexiconInputStream)blis1;
-		final BlockLexiconInputStream lis2 = (BlockLexiconInputStream)blis2;
-		final BlockLexiconOutputStream los = (BlockLexiconOutputStream)blos;
-
-		boolean hasMore1 = true;
-		boolean hasMore2 = true;
-		int termID1 = 0;
-		int termID2 = 0;
-		hasMore1 = (lis1.readNextEntry()!=-1);
-		hasMore2 = (lis2.readNextEntry()!=-1);
-		String sTerm1 = null;
-		String sTerm2 = null;
-		if (hasMore1) {
-			termID1 = lis1.getTermId();
-			sTerm1 = lis1.getTerm();
-		}
-		if (hasMore2) {
-			termID2 = lis2.getTermId();
-			sTerm2 = lis2.getTerm();
-		}
-		while (hasMore1 && hasMore2) {
-			int compareString = 0;
-			if (termID1 != termID2)
-			{
-				compareString = sTerm1.compareTo(sTerm2);
-				if (compareString == 0)//, but termids don't match
-				{
-					logger.error("Term "+sTerm1+" had two termids ("+ termID1+","+termID2+")");
-				}
-			}
-			
-			if (compareString <0) {
-				los.writeNextEntry(sTerm1, termID1, lis1.getNt(), lis1.getBlockFrequency(), lis1.getTF(), lis1.getEndOffset(), lis1.getEndBitOffset());
-				hasMore1 = (lis1.readNextEntry()!=-1);
-				if (hasMore1) {
-					termID1 = lis1.getTermId();
-					sTerm1 = lis1.getTerm();
-				}
-			} else if (compareString >0) {
-				los.writeNextEntry(sTerm2, termID2, lis2.getNt(), lis2.getBlockFrequency(), lis2.getTF(), lis2.getEndOffset(), lis2.getEndBitOffset());
-				hasMore2 = (lis2.readNextEntry()!=-1);
-				if (hasMore2) {
-					termID2 = lis2.getTermId();
-					sTerm2 = lis2.getTerm();
-				}
-			} else /*if (compareString == 0)*/ {
-				los.writeNextEntry(
-					sTerm1, 
-					termID1, 
-					lis1.getNt() + lis2.getNt(),
-					lis1.getBlockFrequency() + lis2.getBlockFrequency(),
-					lis1.getTF() + lis2.getTF(),  							 
-					0, //inverted index not built yet
-					(byte)0 //inverted index not built yet
-				);
-		
-				hasMore1 = (lis1.readNextEntry()!=-1);
-				hasMore2 = (lis2.readNextEntry()!=-1);
-				if (hasMore1) {
-					termID1 = lis1.getTermId();
-					sTerm1 = lis1.getTerm();
-				}
-				if (hasMore2) {
-					termID2 = lis2.getTermId();
-					sTerm2 = lis2.getTerm();
-				}
-			}
-		}
-		if (hasMore1) {
-			lis2.close();
-
-			while (hasMore1) {
-				los.writeNextEntry(sTerm1, termID1, lis1.getNt(), lis1.getBlockFrequency(), lis1.getTF(), lis1.getEndOffset(), lis1.getEndBitOffset());
-				hasMore1 = (lis1.readNextEntry()!=-1);
-				if (hasMore1) {
-					termID1 = lis1.getTermId();
-					sTerm1 = lis1.getTerm();
-				}
-			}
-
-			//close input file 1 stream
-			lis1.close();
-			
-		} else if (hasMore2) {
-			lis1.close();
-
-			while (hasMore2) {
-				los.writeNextEntry(sTerm2, termID2, lis2.getNt(), lis2.getBlockFrequency(), lis2.getTF(), lis2.getEndOffset(), lis2.getEndBitOffset());
-				hasMore2 = (lis2.readNextEntry()!=-1);
-				if (hasMore2) {
-					termID2 = lis2.getTermId();
-					sTerm2 = lis2.getTerm();
-				}
-			}
-			//close input file 2 stream
-			lis2.close();
-		}
-		//closing ouptut lexicon stream
-		los.close();	
+		super(i, _structureName, 
+			BlockLexiconMap.class, 
+			"uk.ac.gla.terrier.structures.BlockLexiconEntry");
 	}
-	
-	protected void mergeNLexicons(final LexiconInputStream[] _lis, final LexiconOutputStream _los) throws IOException
-	{
-		final int numLexicons = _lis.length;
-		long totalTokens = 0;
-		long totalPointers = 0;
-		final int hasMore[] = new int[numLexicons];
-		Arrays.fill(hasMore, -1);
-		final PriorityQueue<String> terms = new PriorityQueue<String>(numLexicons);
-		final BlockLexiconOutputStream los = (BlockLexiconOutputStream)_los;
-		final BlockLexiconInputStream[] lis = new BlockLexiconInputStream[numLexicons];
-		
-		for(int i=0;i<numLexicons;i++)
-		{
-			lis[i] = (BlockLexiconInputStream) _lis[i];
-			hasMore[i] = lis[i].readNextEntry();
-			terms.add(lis[i].getTerm());	
-		}
-		int Tf = 0; int Nt = 0; int Bf = 0; String targetTerm= null;
-		int targetTermId  = -1;
-		while(terms.size() > 0)
-		{
-			//what term are we working on
-			targetTerm = terms.poll();
-			//logger.debug("Current term is "+targetTerm + "length="+targetTerm.length());
-			//for each input lexicon
-			for(int i=0;i<numLexicons;i++)
-			{
-				//does this lexicon contain the term
-				//logger.debug("Checking lexicon "+i+" for "+targetTerm+"="+lis[i].getTerm());
-				if(hasMore[i] != -1 && lis[i].getTerm().equals(targetTerm))
-				{
-					if (targetTermId == -1)
-					{	//obtain the termid for this term from the first lexicon that has the term
-						targetTermId = lis[i].getTermId();
-					}
-					else if (targetTermId != lis[i].getTermId())
-					{	//check the termids match for this term
-						logger.error("Term "+targetTerm+" had two termids ("+targetTermId+","+lis[i].getTermId()+")");
-					}
-					//logger.debug("Term "+targetTerm + " found in "+i + "termid="+ lis[i].getTermId());
-					Tf += lis[i].getTF();
-					Nt += lis[i].getNt();
-					Bf += lis[i].getBlockFrequency();
-					hasMore[i] = lis[i].readNextEntry();
-					if (hasMore[i] != -1)
-					{
-						terms.add(lis[i].getTerm());
-						//break;
-					}
-					break;
-				}
-			}
-			if (terms.size()>0 && !terms.peek().equals(targetTerm))
-			{
-				if (targetTermId == -1)
-				{
-					logger.error("Term "+ targetTerm + " not found in any lexicons");
-				}
-				//end of this term, so we can write the lexicon entry
-				totalTokens += Tf;
-				totalPointers += Nt;
-				los.writeNextEntry(targetTerm, targetTermId, Nt, Tf, Bf, 0, (byte)0);
-				Bf = 0; Tf = Nt = 0; targetTermId = -1; targetTerm = null;
-			}
-		}
-		totalTokens += Tf;
-		totalPointers += Nt;
-		if (targetTermId != -1)
-			los.writeNextEntry(targetTerm, targetTermId, Nt, Tf, Bf, 0, (byte)0);
-		los.close();
-		for(int i=0;i<numLexicons;i++)
-			lis[i].close();
-	}
-
-
-	public static void main(String args[]) {
-		String path = args[0];
-		String prefix = args[1];
-		BlockLexiconBuilder blb = new BlockLexiconBuilder(path, prefix);
-		
-		String lexiconFilename = path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX;
-		LexiconInputStream lexStream = new LexiconInputStream(lexiconFilename);
-		blb.createLexiconHash(lexStream);
-		
-	}
-	
 }
Index: src/uk/ac/gla/terrier/structures/indexing/UTFBlockInvertedIndexBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/UTFBlockInvertedIndexBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/UTFBlockInvertedIndexBuilder.java	(working copy)
@@ -1,83 +0,0 @@
-/*
- * Terrier - Terabyte Retriever
- * Webpage: http://ir.dcs.gla.ac.uk/terrier
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is UTFBlockInvertedIndexBuilder.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> 
- *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
- */
-package uk.ac.gla.terrier.structures.indexing;
-import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.UTFBlockLexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconOutputStream;
-/**
- * Builds an inverted index using block information, where indexing lexicon is a UTFBlock lexicon. It is optional to 
- * save field information as well. 
- * @author Douglas Johnson &amp; Vassilis Plachouras &amp; Craig Macdonald
- * @version $Revision: 1.12 $
- */
-public class UTFBlockInvertedIndexBuilder extends BlockInvertedIndexBuilder {
-
-	public UTFBlockInvertedIndexBuilder(Index i)
-	{
-		super(i);
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		finalLexiconClass = "uk.ac.gla.terrier.structures.UTFLexicon";
-		finalLexiconInputStreamClass = "uk.ac.gla.terrier.structures.UTFLexiconInputStream";
-	}
-	
-	/**
-	 * Creates an instance of the BlockInvertedIndex class.
-	 * @deprecated
-	 */
-	public UTFBlockInvertedIndexBuilder() {
-		super();
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		finalLexiconClass = "uk.ac.gla.terrier.structures.UTFLexicon";
-		finalLexiconInputStreamClass = "uk.ac.gla.terrier.structures.UTFLexiconInputStream";
-	}
-	/**
-	 * Creates an instance of the BlockInvertedIndex class 
-	 * using the given filename.
-	 * @param filename the name of the inverted file
-	 * @deprecated
-	 */
-	public UTFBlockInvertedIndexBuilder(String filename) {
-		super(filename);
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		finalLexiconClass = "uk.ac.gla.terrier.structures.UTFLexicon";
-		finalLexiconInputStreamClass = "uk.ac.gla.terrier.structures.UTFLexiconInputStream";
-	}
-	
-	/**
-	@deprecated */
-	public UTFBlockInvertedIndexBuilder(String path, String prefix) {
-		super(path, prefix);
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-		finalLexiconClass = "uk.ac.gla.terrier.structures.UTFLexicon";
-		finalLexiconInputStreamClass = "uk.ac.gla.terrier.structures.UTFLexiconInputStream";
-	}
-}
Index: src/uk/ac/gla/terrier/structures/indexing/LexiconMap.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/LexiconMap.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/LexiconMap.java	(working copy)
@@ -32,6 +32,7 @@
 import java.io.IOException;
 import java.util.Arrays;
 
+import uk.ac.gla.terrier.structures.BasicLexiconEntry;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.TermCodes;
@@ -96,14 +97,16 @@
 	  * The binary tree is traversed in order, by called the method
 	  * traverseAndStoreToStream.
 	  * @param lexiconStream The lexicon output stream to store to. */
-	public void storeToStream(LexiconOutputStream lexiconStream) throws IOException {
-		final byte zerob = (byte)0;
-		final long zerol = (long)0;
+	public void storeToStream(LexiconOutputStream<String> lexiconStream) throws IOException
+	{
 		final String[] terms = tfs.keys(new String[0]);
 		Arrays.sort(terms);
+		BasicLexiconEntry le = new BasicLexiconEntry();//TODO could use the one without positions
 		for (String t : terms)
 		{
-			lexiconStream.writeNextEntry(t, TermCodes.getCode(t), nts.get(t), tfs.get(t), zerol, zerob);
+			le.setTermId(TermCodes.getCode(t));
+			le.setStatistics(nts.get(t), tfs.get(t));
+			lexiconStream.writeNextEntry(t, le);
 		}
 	}
 	
Index: src/uk/ac/gla/terrier/structures/indexing/UTFInvertedIndexBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/UTFInvertedIndexBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/UTFInvertedIndexBuilder.java	(working copy)
@@ -1,77 +0,0 @@
-/*
- * Terrier - Terabyte Retriever
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is UTFInvertedIndexBuilder.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
- */
-package uk.ac.gla.terrier.structures.indexing;
-
-import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.UTFLexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconOutputStream;
-
-/**
- * Builds a UTF inverted index, using field information optionally.
- * @author Craig Macdonald &amp; Vassilis Plachouras
- * @version $Revision: 1.14 $
- */
-public class UTFInvertedIndexBuilder extends InvertedIndexBuilder {
-
-	public UTFInvertedIndexBuilder(Index i)
-	{
-		super(i);
-		lexiconInputStream = UTFLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-	}
-	
-	/**
-	 * A default constructor of the class InvertedIndex.
-	 * @deprecated
-	 */
-	public UTFInvertedIndexBuilder() {
-		super();
-		lexiconInputStream = UTFLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-	}
-
-	/** @deprecated */
-	public UTFInvertedIndexBuilder(String path, String prefix)
-	{
-		super(path, prefix);
-		lexiconInputStream = UTFLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-	}
-
-	/**
-	 * Creates an instance of the InvertedIndex
-	 * class using the given filename.
-	 * @param filename The name of the inverted file
-	 * @deprecated
-	 */
-	public UTFInvertedIndexBuilder(String filename) {
-		super(filename);
-		lexiconInputStream = UTFLexiconInputStream.class;
-		lexiconOutputStream = UTFLexiconOutputStream.class;
-	}
-	
-}
Index: src/uk/ac/gla/terrier/structures/indexing/BlockInvertedIndexBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/BlockInvertedIndexBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/BlockInvertedIndexBuilder.java	(working copy)
@@ -34,15 +34,18 @@
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
 
 import org.apache.log4j.Logger;
 
 import uk.ac.gla.terrier.structures.BlockDirectIndexInputStream;
-import uk.ac.gla.terrier.structures.BlockLexiconInputStream;
-import uk.ac.gla.terrier.structures.DocumentIndex;
+import uk.ac.gla.terrier.structures.BlockEntryStatistics;
+import uk.ac.gla.terrier.structures.Closeable;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexicon;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.FieldScore;
 import uk.ac.gla.terrier.utility.Files;
@@ -99,42 +102,9 @@
 
 	private static Logger logger = Logger.getRootLogger();
 	protected String finalLexiconClass = "uk.ac.gla.terrier.structures.Lexicon";
-	protected String finalLexiconInputStreamClass = "uk.ac.gla.terrier.structures.LexiconInputStream";
-
-	/**
-	 * Creates an instance of the BlockInvertedIndex class.
-	 * @deprecated
-	 */
-	public BlockInvertedIndexBuilder() {
-		this(ApplicationSetup.TERRIER_INDEX_PATH,
-				ApplicationSetup.TERRIER_INDEX_PREFIX);
-	}
 
-	/**
-	 * Creates an instance of the BlockInvertedIndex class using the given
-	 * filename.
-	 * 
-	 * @param filename
-	 *			the name of the inverted file
-	 * @deprecated use this() or this(String, String) instead
-	 */
-	public BlockInvertedIndexBuilder(String filename) {
-		super(filename);
-		lexiconInputStream = BlockLexiconInputStream.class;
-		lexiconOutputStream = LexiconOutputStream.class;
-	}
-	/**
-	 * @deprecated
-	 */
-	public BlockInvertedIndexBuilder(String path, String prefix) {
-		super(path, prefix);
-		lexiconInputStream = BlockLexiconInputStream.class;
-		lexiconOutputStream = LexiconOutputStream.class;
-	}
-
-	public BlockInvertedIndexBuilder(Index index) {
-		super(index);
-		lexiconInputStream = BlockLexiconInputStream.class;
+	public BlockInvertedIndexBuilder(Index index, String structureName) {
+		super(index, structureName);
 		lexiconOutputStream = LexiconOutputStream.class;
 	}
 
@@ -145,26 +115,23 @@
 	 * need to read the direct file is related to the parameter M, and
 	 * consequently to the size of the available memory.
 	 */
+	@SuppressWarnings("unchecked")
 	public void createInvertedIndex() {
 		numberOfPointersPerIteration = Integer.parseInt(ApplicationSetup.getProperty("invertedfile.processpointers", "2000000")); 
 		processTerms = Integer.parseInt(ApplicationSetup.getProperty("invertedfile.processterms", "25000"));
 		try {
 			Runtime r = Runtime.getRuntime();
 			logger.info("creating block inverted index");
-			final String LexiconFilename = indexPathPrefix
-					+ ApplicationSetup.LEXICONSUFFIX;
-			final String DocumentIndexFilename = indexPathPrefix
-					+ ApplicationSetup.DOC_INDEX_SUFFIX;
-			DocumentIndex docIndex = new DocumentIndex(DocumentIndexFilename);
-			final int numberOfDocuments = docIndex.getNumberOfDocuments();
-			docIndex.close();
+			final String LexiconFilename = index.getPath() + "/" + index.getPrefix() + ".lexicon";
+			final int numberOfDocuments = index.getCollectionStatistics().getNumberOfDocuments();
 
 			long assumedNumberOfPointers = Long.parseLong(index.getIndexProperty("num.Pointers", "0"));
 			long numberOfTokens = 0;
 			long numberOfPointers = 0;
 
-			BlockLexiconInputStream lexiconStream = (BlockLexiconInputStream) getLexInputStream(LexiconFilename);
-			numberOfUniqueTerms = lexiconStream.numberOfEntries();
+			int numberOfUniqueTerms = index.getLexicon().numberOfEntries();
+			Iterator<Map.Entry<String, LexiconEntry>> lexiconStream = (Iterator<Map.Entry<String, LexiconEntry>>)this.index.getIndexStructureInputStream("lexicon");
+
 			// A temporary file for storing the updated
 			// lexicon file, after creating the inverted file
 			DataOutputStream dos = new DataOutputStream(Files.writeFileStream(LexiconFilename.concat(".tmp2")));
@@ -299,49 +266,50 @@
 			this.numberOfTokens = numberOfTokens;
 			this.numberOfPointers = numberOfPointers;
 			file.close();
-			lexiconStream.close();
+			
+			if (lexiconStream instanceof Closeable) {
+				((Closeable)lexiconStream).close();
+			}
 			dos.close();
 			// finalising the lexicon file with the updated information
 			// on the frequencies and the offsets
-			BlockLexiconInputStream lis = (BlockLexiconInputStream) getLexInputStream(LexiconFilename);
+//			finalising the lexicon file with the updated information
+			//on the frequencies and the offsets
 			// reading the original lexicon
-			LexiconOutputStream los = getLexOutputStream(LexiconFilename
-					.concat(".tmp3"));
+			lexiconStream = (Iterator<Map.Entry<String,LexiconEntry>>)index.getIndexStructureInputStream("lexicon");
+			
+			
 			// the updated lexicon
+			LexiconOutputStream<String> los = getLexOutputStream("tmplexicon");
+			
+			//the temporary data containing the offsets
 			DataInputStream dis = new DataInputStream(Files.openFileStream(LexiconFilename.concat(".tmp2")));
 
-			// the temporary data
-			while (lis.readNextEntryBytes() != -1) {
-				los.writeNextEntry(lis.getTermCharacters(), lis.getTermId(),
-						lis.getNt(),
-						// lis.getBlockFrequency(),
-						dis.readInt(),
-						// the term frequency
-						dis.readLong(), // the ending byte offset
-						dis.readByte());
+			while(lexiconStream.hasNext())
+			{
+				Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
+				LexiconEntry value = lee.getValue();
+				value.setPosition(dis.readLong(), dis.readByte());
+				los.writeNextEntry(lee.getKey(), value);
 			}
-			lis.close();
 			los.close();
 			dis.close();
-			if (! Files.delete(LexiconFilename)) 
-				logger.error("delete file .lex failed!");
-			if (! Files.delete(LexiconFilename.concat(".tmp2"))) 
-				logger.error("delete file .lex.tmp2 failed!");
-			if (! Files.rename(LexiconFilename.concat(".tmp3"), LexiconFilename))
-				logger.error("rename file .lex.tmp3 to .lex failed!");
+			Files.delete(LexiconFilename.concat(".tmp2"));
+			FSOMapFileLexicon.deleteMapFileLexicon("lexicon", index.getPath(), index.getPrefix());
+			FSOMapFileLexicon.renameMapFileLexicon("tmplexicon", index.getPath(), index.getPrefix(), "lexicon", index.getPath(), index.getPrefix());
+			
+			//TODO : BlockInvertedIndexBuilder should change the Lexicon to use BasicLexiconEntry instead of BlockLexiconEntry
 
-			index.addIndexStructure("lexicon",finalLexiconClass);
-			index.addIndexStructureInputStream("lexicon",finalLexiconInputStreamClass);
 			index.addIndexStructure(
-					"inverted", 
+					structureName, 
 					"uk.ac.gla.terrier.structures.BlockInvertedIndex", 
-					"uk.ac.gla.terrier.structures.Lexicon,java.lang.String,java.lang.String", 
-					"lexicon,path,prefix");
+					"uk.ac.gla.terrier.structures.Index,java.lang.String", 
+					"index,structureName");
 			 index.addIndexStructureInputStream(
-					"inverted",
+					structureName,
 					"uk.ac.gla.terrier.structures.BlockInvertedIndexInputStream",
-					"java.lang.String,java.lang.String,uk.ac.gla.terrier.structures.LexiconInputStream",
-					"path,prefix,lexicon-inputstream");
+                    "uk.ac.gla.terrier.structures.Index,java.lang.String,java.util.Iterator",
+                    "index,structureName,lexicon-inputstream");
 			index.setIndexProperty("num.inverted.fields.bits", ""+FieldScore.FIELDS_COUNT );
 			//these should be already set, but in case their not
 			index.setIndexProperty("num.Terms", ""+numberOfUniqueTerms);
@@ -354,113 +322,19 @@
 		}
 	}
 
-	/**
-	 * Iterates through the lexicon, until it has reached the given number of
-	 * pointers
-	 * 
-	 * @param PointersToProcess
-	 *			Number of pointers to stop reading the lexicon after
-	 * @param blexiconStream
-	 *			the lexicon input stream to read
-	 * @param codesHashMap
-	 * @param tmpStorageStorage
-	 * @return
-	 */
-	protected IntLongTuple scanLexiconForPointers(final long PointersToProcess,
-			final LexiconInputStream blexiconStream,
-			final TIntIntHashMap codesHashMap, final ArrayList<TIntArrayList[]> tmpStorageStorage)
-			throws IOException {
-		final BlockLexiconInputStream lexiconStream = (BlockLexiconInputStream) blexiconStream;
-		int processTerms = 0;
-		long numberOfPointersThisIteration = 0;
-		long numberOfBlocksThisIteration = 0;
-		int j = 0; // counter of loop iterations
-		while (numberOfPointersThisIteration < PointersToProcess) {
-
-			if (lexiconStream.readNextEntry() == -1)
-				break;
-
-			processTerms++;
-
+	protected TIntArrayList[] createPointerForTerm(LexiconEntry le)
+	{
 			TIntArrayList[] tmpArray = new TIntArrayList[5];
-			final int tmpNT = lexiconStream.getNt();
+		final int tmpNT = le.getDocumentFrequency();
 			tmpArray[0] = new TIntArrayList(tmpNT);
 			tmpArray[1] = new TIntArrayList(tmpNT);
 			tmpArray[2] = new TIntArrayList(tmpNT);
 			tmpArray[3] = new TIntArrayList(tmpNT);
-			tmpArray[4] = new TIntArrayList(lexiconStream.getBlockFrequency());
-			numberOfPointersThisIteration += tmpNT;
-			numberOfBlocksThisIteration += lexiconStream.getBlockFrequency();
-
-			tmpStorageStorage.add(tmpArray);
-
-			// the class TIntIntHashMap return zero when you look up for a
-			// the value of a key that does not exist in the hash map.
-			// For this reason, the values that will be inserted in the
-			// hash map are increased by one.
-			codesHashMap.put(lexiconStream.getTermId(), j + 1);
-
-			// increment counter
-			j++;
-		}
-		if(logger.isDebugEnabled()){
-			logger.debug(numberOfPointersThisIteration + " pointers == "
-				+ processTerms + " terms == " + numberOfBlocksThisIteration
-				+ " blocks");
-		}
-		return new IntLongTuple(processTerms, numberOfPointersThisIteration);
+		tmpArray[4] = new TIntArrayList(((BlockEntryStatistics)le).getBlockCount());
+		return tmpArray;
 	}
 
-	/**
-	 * Iterates through the lexicon, until it has reached the given number of
-	 * terms
-	 * 
-	 * @param processTerms
-	 *			Number of terms to stop reading the lexicon after
-	 * @param blexiconStream
-	 *			the lexicon input stream to read
-	 * @param codesHashMap
-	 * @param tmpStorageStorage
-	 * @return
-	 */
-	protected IntLongTuple scanLexiconForTerms(final int processTerms,
-			final LexiconInputStream blexiconStream,
-			final TIntIntHashMap codesHashMap, TIntArrayList[][] tmpStorage)
-			throws IOException {
-		final BlockLexiconInputStream lexiconStream = (BlockLexiconInputStream) blexiconStream;
-		int j = 0;
-		long numberOfBlocksThisIteration = 0;
-		long numberOfPointersThisIteration = 0;
-		for (; j < processTerms; j++) {
 
-			if (lexiconStream.readNextEntry() == -1)
-				break;
-
-			TIntArrayList[] tmpArray = new TIntArrayList[5];
-			final int tmpNT = lexiconStream.getNt();
-			tmpArray[0] = new TIntArrayList(tmpNT);
-			tmpArray[1] = new TIntArrayList(tmpNT);
-			tmpArray[2] = new TIntArrayList(tmpNT);
-			tmpArray[3] = new TIntArrayList(tmpNT);
-			tmpArray[4] = new TIntArrayList(lexiconStream.getBlockFrequency());
-
-			numberOfPointersThisIteration += tmpNT;
-			numberOfBlocksThisIteration += lexiconStream.getBlockFrequency();
-
-			tmpStorage[j] = tmpArray;
-
-			// the class TIntIntHashMap return zero when you look up for a
-			// the value of a key that does not exist in the hash map.
-			// For this reason, the values that will be inserted in the
-			// hash map are increased by one.
-			codesHashMap.put(lexiconStream.getTermId(), j + 1);
-		}
-		if(logger.isDebugEnabled()){
-			logger.debug(numberOfPointersThisIteration + " pointers == " + j
-				+ " terms == " + numberOfBlocksThisIteration + " blocks");
-		}
-		return new IntLongTuple(j, numberOfPointersThisIteration);
-	}
 
 	/**
 	 * Traverses the direct fies recording all occurrences of terms noted in
@@ -481,10 +355,7 @@
 		// scan the direct file
 		//BlockDirectIndexInputStream directInputStream = new BlockDirectIndexInputStream(
 		//		indexPath, indexPrefix);
-		BlockDirectIndexInputStream directInputStream =
-			index != null
-				? (BlockDirectIndexInputStream)index.getIndexStructureInputStream("direct")
-				: new BlockDirectIndexInputStream(indexPath, indexPrefix);
+		BlockDirectIndexInputStream directInputStream = (BlockDirectIndexInputStream)index.getIndexStructureInputStream("direct");
 		int[][] documentTerms = null;
 		int p = 0; // a document counter;
 		while ((documentTerms = directInputStream.getNextTerms()) != null) {
@@ -581,6 +452,9 @@
 			tmpMatrix = null;
 			tmpStorage[j] = null;
 
+			dos.writeLong(file.getByteOffset());
+			dos.writeByte(file.getBitOffset());
+
 			// write the first entry
 			int docid = tmpMatrix0[0];
 			file.writeGamma(docid + 1);
@@ -615,17 +489,17 @@
 					blockindex++;
 				}
 			}
-			long endOffset = file.getByteOffset();
-			byte endBitOffset = file.getBitOffset();
-			endBitOffset--;
-			if (endBitOffset < 0 && endOffset > 0) {
-				endBitOffset = 7;
-				endOffset--;
-			}
+			//long endOffset = file.getByteOffset();
+			//byte endBitOffset = file.getBitOffset();
+			//endBitOffset--;
+			//if (endBitOffset < 0 && endOffset > 0) {
+			//	endBitOffset = 7;
+			//	endOffset--;
+			//}
 			numTokens += frequency;
-			dos.writeInt(frequency);
-			dos.writeLong(endOffset);
-			dos.writeByte(endBitOffset);
+			//dos.writeInt(frequency);
+			//dos.writeLong(endOffset);
+			//dos.writeByte(endBitOffset);
 
 			// dereference the arrays so they can be destroyed by GC
 			tmpMatrix0 = tmpMatrix1 = tmpMatrix2 = tmpMatrix3 = tmpMatrix4 = null;
Index: src/uk/ac/gla/terrier/structures/indexing/UTFBlockLexiconBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/UTFBlockLexiconBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/UTFBlockLexiconBuilder.java	(working copy)
@@ -1,135 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is UTFBlockLexiconBuilder.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Douglas Johnson <johnsoda{a.}dcs.gla.ac.uk> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
- *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk> 
- */
-package uk.ac.gla.terrier.structures.indexing;
-import java.io.IOException;
-
-import org.apache.log4j.Logger;
-
-import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFBlockLexiconInputStream;
-import uk.ac.gla.terrier.structures.UTFBlockLexiconOutputStream;
-import uk.ac.gla.terrier.structures.UTFLexicon;
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-/**
- * Builds a block lexicon using block frequencies.
- * @author Douglas Johnsonm, Vassilis Plachouras &amp; Craig Macdonald
- * @version $Revision: 1.17 $
- */
-public class UTFBlockLexiconBuilder extends BlockLexiconBuilder
-{
-	protected static Logger logger = Logger.getRootLogger();
-	/**
-	 * A default constructor of the class. The lexicon is built in the 
-	 * default path and file: ApplicationSetup.TERRIER_INDEX_PATH and 
-	 * ApplicationSetup.TERRIER_INDEX_PREFIX respectively.
-	 */	
-	public UTFBlockLexiconBuilder() {
-		super();
-		lexiconOutputStream = UTFBlockLexiconOutputStream.class;
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		LexiconMapClass = BlockLexiconMap.class;
-		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-	}
-	
-	public UTFBlockLexiconBuilder(Index i)
-	{
-		super(i);
-		lexiconOutputStream = UTFBlockLexiconOutputStream.class;
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		LexiconMapClass = BlockLexiconMap.class;
-		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-	}
-	
-	/** 
-	 * A default constructor which is given a pathname in which
-	 * the temporary lexicons will be stored.
-	 * @param pathname String the name of the path in which the temporary
-	 * and final lexicons will be stored.
-	 * @param prefix String the file component of the lexicons
-	 */	
-	public UTFBlockLexiconBuilder(String pathname, String prefix) {
-		super(pathname, prefix);
-		lexiconOutputStream = UTFBlockLexiconOutputStream.class;
-		lexiconInputStream = UTFBlockLexiconInputStream.class;
-		LexiconMapClass = BlockLexiconMap.class;
-		try{ TempLex = (LexiconMap) LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);}
-	}	
-
-	/**
-	 * The method that performs processing of the lexicon after the
-	 * creation of the direct index has been completed. It flushes to 
-	 * disk the current temporary lexicon, and it starts the merging
-	 * of the temporary lexicons and the creation of the lexicon index. 
-	 */
-	public void finishedDirectIndexBuild()
-	{
-		if(logger.isInfoEnabled()){
-			logger.info("flushing utf block lexicon to disk after the direct index completed");
-		}
-		//only write a temporary lexicon if there are any items in it
-		if (TempLex.getNumberOfNodes() > 0)
-			writeTemporaryLexicon();
-
-		//merges the temporary lexicons
-		if (tempLexFiles.size() > 0)
-		{
-			try{
-				merge(tempLexFiles);
-	
-				//creates the offsets file
-				final String lexiconFilename = 
-					indexPath + ApplicationSetup.FILE_SEPARATOR + 
-					indexPrefix + ApplicationSetup.LEXICONSUFFIX;
-				LexiconInputStream lis = getLexInputStream(lexiconFilename);
-				createLexiconIndex(
-					lis,
-					lis.numberOfEntries(),
-					/* after inverted index is built, the lexicon will be transformed into a
-					 * normal lexicon, without block frequency */
-					UTFLexicon.lexiconEntryLength
-					); 
-				TermCount = lis.numberOfEntries();
-				if (index != null)
-				{
-					index.addIndexStructure("lexicon", "uk.ac.gla.terrier.structures.UTFBlockLexicon");
-					index.addIndexStructureInputStream("lexicon", "uk.ac.gla.terrier.structures.UTFBlockLexiconInputStream");
-					index.setIndexProperty("num.Terms", ""+lis.numberOfEntries());
-					index.setIndexProperty("num.Pointers", ""+lis.getNumberOfPointersRead());
-				}
-			} catch(IOException ioe){
-				logger.error("Indexing failed to write a lexicon index file to disk", ioe);
-			}	
-		}
-		else
-			logger.warn("No temporary lexicons to merge, skipping");
-		
-	}
-
-	
-}
Index: src/uk/ac/gla/terrier/structures/indexing/BlockDirectIndexBuilder.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/BlockDirectIndexBuilder.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/BlockDirectIndexBuilder.java	(working copy)
@@ -87,13 +87,6 @@
 		/* find out where we are */
 		FilePosition rtr = getLastEndOffset();
 		
-		/* flush to disk if necessary */
-		if (DocumentsSinceFlush++ >= DocumentsPerFlush)
-		{
-			flushBuffer();
-			resetBuffer();
-			DocumentsSinceFlush = 0;
-		}
 		/* and then return where the position of the last 
 		 * write to the DirectIndex */
 		return rtr;
@@ -106,8 +99,6 @@
 	 */
 	public void finishedCollections()
 	{
-		flushBuffer();
-		resetBuffer();
 		DocumentsSinceFlush = 0;
 		logger.info("flush direct index");
 		try{
Index: src/uk/ac/gla/terrier/structures/indexing/singlepass/hadoop/HadoopRunsMerger.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/singlepass/hadoop/HadoopRunsMerger.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/singlepass/hadoop/HadoopRunsMerger.java	(working copy)
@@ -30,6 +30,7 @@
 import java.io.IOException;
 import java.util.LinkedList;
 import java.util.ListIterator;
+import uk.ac.gla.terrier.structures.BasicLexiconEntry;
 import org.apache.hadoop.mapred.TaskID;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
 import uk.ac.gla.terrier.structures.indexing.singlepass.PostingInRun;
@@ -62,9 +63,9 @@
 		mapData = _mapData;
 	}
 
-	public void endMerge(LexiconOutputStream lexStream) {}
+	public void endMerge(LexiconOutputStream<String> lexStream) {}
 	
-	public void mergeOne(LexiconOutputStream lexStream) throws Exception
+	public void mergeOne(LexiconOutputStream<String> lexStream) throws Exception
 	{	
 		int maxDF = 0;
 		RunIterator run = runsSource.createRunIterator(-1);
@@ -74,6 +75,8 @@
 		lastTermWritten = null;
 		lastFreq = 0;
 		lastDocFreq= 0;
+		long startOffset = this.getByteOffset();
+		byte startBitOffset = this.getBitOffset();
 		// for each run in the list 
 		int counter = 0;
 		//for one term: for each set of postings for that term
@@ -83,7 +86,6 @@
 			PostingInRun posting = run.next();
 			lastTermWritten = posting.getTerm();
 			final int reduceNumber = (TaskID.forName(_run.getMapNo()).getId()/partitionSize);
-			
 			//
 			if (posting.getDf() > maxDF) 
 				maxDF = posting.getDf();
@@ -129,7 +131,7 @@
 			lastDocFreq += posting.getDf();
 			counter++;
 		}
-		lexStream.writeNextEntry(lastTermWritten, currentTerm++, lastDocFreq, lastFreq, this.getByteOffset(), (byte)this.getBitOffset());
+		lexStream.writeNextEntry(lastTermWritten, new BasicLexiconEntry(currentTerm++, lastDocFreq, lastFreq, startOffset, startBitOffset));
 		numberOfPointers += lastDocFreq;
 	}
 	
Index: src/uk/ac/gla/terrier/structures/indexing/singlepass/hadoop/MultiFileCollectionInputFormat.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/singlepass/hadoop/MultiFileCollectionInputFormat.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/singlepass/hadoop/MultiFileCollectionInputFormat.java	(working copy)
@@ -48,7 +48,9 @@
 /**
  * Input Format Class for Hadoop Indexing. Splits the input collection into
  * sets of files where each Map task gets about the same number of files.
- * Files are assumed to be un-splittable and are not split.
+ * Files are assumed to be un-splittable and are not split. Splits are of
+ * adjacent files - i.e. split 0 always has the first file, and the last
+ * split always has the last file. Any given split will have adjacent files.
  * @author Richard McCreadie and Craig Macdonald
  * @since 2.2
  * @version $Revision: 1.2 $
Index: src/uk/ac/gla/terrier/structures/indexing/singlepass/RunsMerger.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/singlepass/RunsMerger.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/singlepass/RunsMerger.java	(working copy)
@@ -35,6 +35,9 @@
 
 import uk.ac.gla.terrier.compression.BitOut;
 import uk.ac.gla.terrier.compression.BitOutputStream;
+import uk.ac.gla.terrier.structures.BasicLexiconEntry;
+import uk.ac.gla.terrier.structures.BitFilePosition;
+import uk.ac.gla.terrier.structures.FilePosition;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
 
 /**
@@ -87,6 +90,8 @@
 	/** Number of pointers written */
 	protected int numberOfPointers = 0;
 
+	protected BitFilePosition startOffset = new FilePosition(0l,(byte)0);
+
 	
 	protected RunIteratorFactory runsSource;
 	
@@ -134,14 +139,16 @@
 	 * @return the byte offset in the BitOut (used for lexicon writting)
 	 */
 	public long getByteOffset(){
-		return bos.getBitOffset() == 0? bos.getByteOffset() - 1: bos.getByteOffset(); 
+		return bos.getByteOffset();
+		//return bos.getBitOffset() == 0? bos.getByteOffset() - 1: bos.getByteOffset(); 
 	}
 	
 	/**
 	 * @return the bit offset in the BitOut (used for lexicon writting)
 	 */
-	public int getBitOffset(){
-		return bos.getBitOffset() == 0? 7: bos.getBitOffset() - 1;
+	public byte getBitOffset(){
+		return bos.getBitOffset();
+		//return bos.getBitOffset() == 0 ? (byte)7 : bos.getBitOffset() - (byte)1;
 	}
 	
 	/**
@@ -207,21 +214,26 @@
 	 * @param lexStream LexiconOutputStream used to write the lexicon.
 	 * @throws IOException if an I/O error occurs.
 	 */
-	public void mergeOne(LexiconOutputStream lexStream) throws Exception{		
+	public void mergeOne(LexiconOutputStream<String> lexStream) throws Exception{		
 		myRun = queue.poll();
 		if(myRun.current().getTerm().equals(lastTermWritten)){
 			// append the term --> keep the data in memory
 			lastDocument = myRun.current().append(bos, lastDocument);
 			lastFreq += myRun.current().getTF();
 			lastDocFreq += myRun.current().getDf();
+			
 		}else{			
-			lexStream.writeNextEntry(lastTermWritten, currentTerm++, lastDocFreq, lastFreq, this.getByteOffset(), (byte)this.getBitOffset());
-			// write the new term
+			//write this term to the lexicon
+			lexStream.writeNextEntry(lastTermWritten, new BasicLexiconEntry(currentTerm++, lastDocFreq, lastFreq, startOffset));
+			//record the start offset of the next term
+			startOffset.setPosition(this.getByteOffset(), this.getBitOffset());
+			//get the information of the next term from the Run
 			numberOfPointers += lastDocFreq;
 			lastDocument = myRun.current().append(bos,-1);
 			lastFreq = myRun.current().getTF();
 			lastDocFreq = myRun.current().getDf();
 			lastTermWritten = myRun.current().getTerm();
+			
 		}
 		if(myRun.hasNext()){
 			myRun.next();
@@ -236,8 +248,9 @@
 	 * @param lexStream LexiconOutputStream used to write the lexicon.
 	 * @throws IOException if an I/O error occurs.	
 	 */	
-	public void endMerge(LexiconOutputStream lexStream) throws IOException{
-		lexStream.writeNextEntry(lastTermWritten, currentTerm++, lastDocFreq, lastFreq, this.getByteOffset(), (byte)this.getBitOffset());		
+	public void endMerge(LexiconOutputStream<String> lexStream) throws IOException{
+		lexStream.writeNextEntry(lastTermWritten, new BasicLexiconEntry(currentTerm++, lastDocFreq, lastFreq, startOffset));
+		//startOffset.setPosition(this.getByteOffset(), this.getBitOffset());
 		numberOfPointers += lastDocFreq;
 		bos.close();
 		myRun.close();
Index: src/uk/ac/gla/terrier/structures/indexing/TermEstimateIndex.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/TermEstimateIndex.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/TermEstimateIndex.java	(working copy)
@@ -25,15 +25,17 @@
  */
 package uk.ac.gla.terrier.structures.indexing;
 import java.io.DataInputStream;
-import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
 
 import org.apache.log4j.Logger;
 
+import uk.ac.gla.terrier.structures.Closeable;
 import uk.ac.gla.terrier.structures.CollectionStatistics;
 import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.Lexicon;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.Files;
 /**
@@ -42,20 +44,16 @@
  */
 public class TermEstimateIndex {
 	private static Logger logger = Logger.getRootLogger();
-	protected final Lexicon lex;
+	protected final Lexicon<String> lex;
 	protected final int numTerms;
 
 	/** The array of term estimate for each term. It is sorted by termid. */
 	protected double[] termEstimate;
 	/** The filename of the term estimate index on disk. */
 	protected String INDEX_FILENAME; 
-	/**
-	 * The default constructor.
-	 */
-	public TermEstimateIndex() {
-		this( Index.createIndex() );
-	}
-	public TermEstimateIndex(Index index)
+
+	@SuppressWarnings("unchecked")
+	public TermEstimateIndex(Index index) throws IOException
 	{
 		final String path  = index.getPath();
 		final String prefix = index.getPrefix();
@@ -71,35 +69,30 @@
 
 		//always use a lexiconinputstream, as blocklexicons dont exist past invertedindex creation
 		//but check if we're using UTF
-		final LexiconInputStream lexin = (LexiconInputStream)index.getIndexStructureInputStream("lexicon");
+		final Iterator<Map.Entry<String,LexiconEntry>> lexin = 
+			(Iterator<Map.Entry<String,LexiconEntry>>)index.getIndexStructureInputStream("lexicon");
 
-
-		for (int i = 0; i < termids.length; i++){
-			try{
-				lexin.readNextEntry();
-				termids[i] = lexin.getTermId();
+		int i=0;
+		while(lexin.hasNext())
+		{
+			termids[i++] = lexin.next().getValue().getTermId();
 			}
-			catch(IOException ioe){
-				logger.error("Problem reading lexicon input stream while loading TermEstimateIndex");
+		if (lexin instanceof Closeable)
+			((Closeable)lexin).close();
 				
+		if (Files.exists(INDEX_FILENAME)){
+			logger.error("Could not load TermEstimate index");
+			return;
 			}
-		}
-		lexin.close();
 		
-		if (Files.exists(INDEX_FILENAME)){
-			try{
 				DataInputStream in = new DataInputStream(
 					Files.openFileStream(INDEX_FILENAME));
-				for (int i = 0; i < collectionStatistics.getNumberOfUniqueTerms(); i++){
+		final int termCount = collectionStatistics.getNumberOfUniqueTerms();
+		for (i = 0; i < termCount; i++){
 					this.termEstimate[termids[i]] = in.readDouble();
 				}
 				in.close();
 			}
-			catch(IOException ioe){
-				logger.error("Problem reading TermEstimateIndex at "+INDEX_FILENAME, ioe);
-			}
-		}
-	}
 	/**
 	 * This method prints all the entries in the term estimate index.
 	 *
@@ -110,9 +103,9 @@
 					Files.openFileStream(INDEX_FILENAME));
 			for (int i = 0; i < numTerms; i++){
 				double te = in.readDouble();
-				lex.seekEntry(i);
+				Map.Entry<String,LexiconEntry> lee = lex.getIthLexiconEntry(i);
 				if(logger.isDebugEnabled()){
-					logger.debug(lex.getTerm() + ": " + te);
+					logger.debug(lee.getKey() + ": " + te);
 				}
 			}
 			in.close();
Index: src/uk/ac/gla/terrier/structures/indexing/BlockLexiconMap.java
===================================================================
--- src/uk/ac/gla/terrier/structures/indexing/BlockLexiconMap.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/indexing/BlockLexiconMap.java	(working copy)
@@ -27,17 +27,23 @@
  */
 package uk.ac.gla.terrier.structures.indexing;
 
-import uk.ac.gla.terrier.structures.LexiconOutputStream;
-import uk.ac.gla.terrier.structures.BlockLexiconOutputStream;
-import gnu.trove.*;
-import java.util.Arrays;
+import gnu.trove.TObjectIntHashMap;
+import gnu.trove.TObjectIntProcedure;
+
 import java.io.IOException;
+import java.util.Arrays;
+
+import uk.ac.gla.terrier.structures.BlockLexiconEntry;
+import uk.ac.gla.terrier.structures.LexiconOutputStream;
 import uk.ac.gla.terrier.utility.TermCodes;
 
 /** LexiconMap implementation that also keeps track of the number of blocks that a term occurrs in.
   * This is useful for sizing the block inverted index */
 public class BlockLexiconMap extends LexiconMap
 {
+	protected static final byte zerob = (byte)0;
+	protected static final long zerol = (long)0;
+	
 	/** Total number of blocks in this index */
 	protected long numberOfBlocks = 0;
 	/** Mapping term to blocks */
@@ -85,16 +91,14 @@
 	}
 
 	/** Stores the lexicon map to a lexicon stream as a sequence of entries.
-	  * @param _lexiconStream The lexicon output stream to store to. */
-	public void storeToStream(final LexiconOutputStream _lexiconStream) throws IOException {
-		final BlockLexiconOutputStream lexiconStream = (BlockLexiconOutputStream)_lexiconStream;
-		final byte zerob = (byte)0;
-		final long zerol = (long)0;
+	  * @param lexiconStream The lexicon output stream to store to. */
+	public void storeToStream(final LexiconOutputStream<String> lexiconStream) throws IOException {
+		
 		final String[] terms = tfs.keys(new String[0]);
 		Arrays.sort(terms);
 		for (String t : terms)
 		{
-			lexiconStream.writeNextEntry(t, TermCodes.getCode(t), nts.get(t), tfs.get(t), blockFreqs.get(t), zerol, zerob);
+			lexiconStream.writeNextEntry(t, new BlockLexiconEntry(TermCodes.getCode(t), nts.get(t), tfs.get(t), zerol, zerob, blockFreqs.get(t)));
 		}
 	}
 
Index: src/uk/ac/gla/terrier/structures/BlockInvertedIndex.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockInvertedIndex.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/BlockInvertedIndex.java	(working copy)
@@ -34,34 +34,24 @@
 /**
  * This class implements the block field inverted 
  * index for performing retrieval.
- * @author Douglas Johnson
+ * @author Douglas Johnson, Craig Macdonald et al.
  * @version $Revision: 1.32 $
  */
 public class BlockInvertedIndex extends InvertedIndex implements IndexConfigurable {
 	protected int DocumentBlockCountDelta = 1;
 	protected BlockInvertedIndex() {}
 
-	/**
-	 * Creates an instance of the BlockInvertedIndex class 
-	 * using the given lexicon.
-	 * @param lexicon The lexicon used for retrieval
-	 */
-	public BlockInvertedIndex(Lexicon lexicon) {
-		super(lexicon);
-	}
-
-	public BlockInvertedIndex(Lexicon lexicon, String path, String prefix) {
-		super(lexicon, path, prefix);
+	public BlockInvertedIndex(Index index, String structureName) {
+		super(index, structureName);
 	}
 	
 	/**
 	 * Creates an instance of the BlockInvertedIndex class 
 	 * using the given lexicon.
-	 * @param lexicon The lexicon used for retrieval
 	 * @param filename the name of the inverted file
 	 */
-	public BlockInvertedIndex(Lexicon lexicon, String filename) {
-		super(lexicon, filename);
+	public BlockInvertedIndex(String filename) {
+		super(filename);
 	}
 
 	/** let it know which index to use */
@@ -70,38 +60,7 @@
 		DocumentBlockCountDelta = i.getIntIndexProperty("blocks.invertedindex.countdelta", 1);
 	}
 
-	/**
-	 * Prints out the block inverted index file.
-	 */
-	public void print() {
-		for (int i = 0; i < lexicon.getNumberOfLexiconEntries(); i++) {
-			lexicon.findTerm(i);
-			System.out.print("Term ("+lexicon.getTerm()+","+i+") : ");
-			int[][] documents = getDocuments(i);
-			int blockindex = 0;
-			for (int j = 0; j < documents[0].length; j++) {
-				System.out.print(
-					"("
-						+ documents[0][j]
-						+ ", "
-						+ documents[1][j]
-						+ ", ");
-				if (FieldScore.USE_FIELD_INFORMATION)
-				{
-					System.out.print(documents[2][j]
-					+ ", ");
-				}
-				System.out.print( documents[3][j]);
 				
-				for (int k = 0; k < documents[3][j]; k++) {
-					System.out.print(", B" + documents[4][blockindex]);
-					blockindex++;
-				}
-				System.out.print(")");
-			}
-			System.out.println();
-		}
-	}
 	/**
 	 * Returns a 2D array containing the document ids, 
 	 * the term frequencies, the field scores the block frequencies and 
@@ -111,14 +70,15 @@
 	 *				 frequencies, while the last vector contains the 
 	 *				 block identifiers and it has a different length from 
 	 *				 the document identifiers.
-	 * @param startOffset start byte of the postings in the inverted file
-	 * @param startBitOffset start bit of the postings in the inverted file
-	 * @param endOffset end byte of the postings in the inverted file
-	 * @param endBitOffset end bit of the postings in the inverted file
-	 * @param df the number of postings to expect 
+	 * @param pointer start byte and bit offset of the postings in the inverted file,
+	 * together with number of postings to expect
 	 */
 
-	public int[][] getDocuments(final long startOffset, final byte startBitOffset, final long endOffset, final byte endBitOffset, final int df) {
+	public int[][] getDocuments(BitIndexPointer pointer) {
+		
+		final long startOffset = pointer.getBytes();
+		final byte startBitOffset = pointer.getBits();
+		final int df = pointer.getNumberOfEntries();
 		
 		final int fieldCount = FieldScore.FIELDS_COUNT;
 		final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
@@ -131,8 +91,7 @@
 		final TIntArrayList blockids = new TIntArrayList(df); //ideally we'd have TF here
 
 		try{
-		
-			final BitIn file = this.file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);
+			final BitIn file = this.file.readReset(startOffset, startBitOffset);
 	
 			if (loadTagInformation) { //if there are tag information to process
 				//documentTerms[2] = new int[df]; 
@@ -196,247 +155,4 @@
 			return null;
 		}
 	}
-
-
-	//* @param termid the id of the term whose documents we are looking for.
-	//public int[][] getDocuments(int termid) {
-	/*public int[][] getDocuments(final long startOffset, final byte startBitOffset, final long endOffset, final byte endBitOffset, int df) {
-
-		//boolean found = lexicon.findTerm(termid);
-		final byte startBitOffset = lexicon.getStartBitOffset();
-		final long startOffset = lexicon.getStartOffset();
-		final byte endBitOffset = lexicon.getEndBitOffset();
-		final long endOffset = lexicon.getEndOffset();
-
-		final int FIELDS_COUNT = FieldScore.FIELDS_COUNT;
-
-		// TODO use heuristics here like we do in InvertedIndex.java
-		 // for setting a good guess of the arraylist sizes. 
-		TIntArrayList temporaryDocids = new TIntArrayList();
-		TIntArrayList temporaryTFs = new TIntArrayList();
-		TIntArrayList temporaryFields = new TIntArrayList();
-		TIntArrayList temporaryBlockFreq = new TIntArrayList();
-		TIntArrayList temporaryBlockIds = new TIntArrayList();
-		int previousDocid = -1;
-			
-		//ArrayList temporaryTerms = new ArrayList();
-		//ArrayList temporaryBlockids = new ArrayList();
-		//int blockcount = 0;
-		try{
-			final BitIn file = this.file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);
-			//boolean hasMore = false;
-			while (((file.getByteOffset() + startOffset) < endOffset)
-				|| (((file.getByteOffset() + startOffset) == endOffset)
-					&& (file.getBitOffset() < endBitOffset))) {
-	
-				temporaryDocids.add(previousDocid = file.readGamma() + previousDocid);
-				temporaryTFs.add(file.readUnary());
-				temporaryFields.add(file.readBinary(FIELDS_COUNT));
-				
-				/*int docId = file.readGamma();
-				/int[] tmp = new int[4];
-				tmp[0] = docId;
-				tmp[1] = file.readUnary();
-				tmp[2] = file.readBinary(FIELDS_COUNT);
-				
-				final int blockfreq = file.readUnary();
-				temporaryBlockFreq.add(blockfreq);
-				//tmp[3] = blockfreq;
-				//System.out.print("docid="+previousDocid + "blockfreq="+blockfreq);
-	
-				int[] tmp2 = new int[blockfreq];
-				int previousBlockId = -1;
-				//System.out.print(" blocks=");
-				for (int i = 0; i < blockfreq; i++) {
-					tmp2[i] = previousBlockId = file.readGamma() + previousBlockId;
-					 //System.out.print(previousBlockId + ",");
-					//blockcount++;
-				}
-				// System.out.println("");
-				//temporaryTerms.add(tmp);
-				//temporaryBlockids.add(tmp2);
-				temporaryBlockIds.add(tmp2);
-			}
-			int[][] documentTerms = new int[5][];
-			documentTerms[0] = temporaryDocids.toNativeArray(); //new int[temporaryTerms.size()];
-			documentTerms[1] = temporaryTFs.toNativeArray(); //new int[temporaryTerms.size()];
-			documentTerms[2] = temporaryFields.toNativeArray(); //new int[temporaryTerms.size()];
-			documentTerms[3] = temporaryBlockFreq.toNativeArray(); //new int[temporaryTerms.size()];
-			documentTerms[4] =	temporaryBlockIds.toNativeArray(); //new int[blockcount];
-			/*
-			documentTerms[0][0] = ((int[]) temporaryTerms.get(0))[0] - 1;
-			documentTerms[1][0] = ((int[]) temporaryTerms.get(0))[1];
-			documentTerms[2][0] = ((int[]) temporaryTerms.get(0))[2];
-			documentTerms[3][0] = ((int[]) temporaryTerms.get(0))[3];
-			int[] blockids = ((int[]) temporaryBlockids.get(0));
-			documentTerms[4][0] = blockids[0] - 1;
-			for (int i = 1; i < blockids.length; i++) {
-				documentTerms[4][i] = blockids[i] + documentTerms[4][i - 1];
-			}
-			int blockindex = blockids.length;
-			if (documentTerms[0].length > 1) {
-				for (int i = 1; i < documentTerms[0].length; i++) {
-					int[] tmpMatrix = (int[]) temporaryTerms.get(i);
-					documentTerms[0][i] = tmpMatrix[0] + documentTerms[0][i - 1];
-					documentTerms[1][i] = tmpMatrix[1];
-					documentTerms[2][i] = tmpMatrix[2];
-					documentTerms[3][i] = tmpMatrix[3];
-					blockids = ((int[]) temporaryBlockids.get(i));
-					documentTerms[4][blockindex] = blockids[0] - 1;
-					blockindex++;
-					for (int j = 1; j < blockids.length; j++) {
-						documentTerms[4][blockindex] =
-							blockids[j] + documentTerms[4][blockindex - 1];
-						blockindex++;
-					}
-				}
-			}
-			return documentTerms;
-		}catch (IOException ioe) {
-			logger.error("Problem reading direct index", ioe);
-			return null;
-		}
-	}*/
-
-
-	/*public int[][] getDocumentsWithoutBlocks(int termid, int startDocid, int endDocid) {
-		if (! lexicon.findTerm(termid))
-			return null;
-	
-		byte startBitOffset = lexicon.getStartBitOffset();
-		long startOffset = lexicon.getStartOffset();
-		byte endBitOffset = lexicon.getEndBitOffset();
-		long endOffset = lexicon.getEndOffset();
-		// TODO use heuristics here like we do in InvertedIndex.java
-		// for setting a good guess of the arraylist sizes. 
-		ArrayList<int[]> temporaryTerms = new ArrayList<int[]>();
-		//int blockcount = 0;
-		try{
-			final BitIn file = this.file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);
-			//boolean hasMore = false;
-			final int fieldCount = FieldScore.FIELDS_COUNT;
-			while (((file.getByteOffset() + startOffset) < endOffset)
-					|| (((file.getByteOffset() + startOffset) == endOffset)
-					&& (file.getBitOffset() < endBitOffset))) {
-				int docId = file.readGamma();
-				int[] tmp = new int[3];
-				tmp[0] = docId;
-				tmp[1] = file.readUnary();
-				tmp[2] = file.readBinary(fieldCount);
-			 
-				//read the blocks, but dont save them
-				int blockfreq = file.readUnary();
-				for (int i = 0; i < blockfreq; i++) {
-					file.readGamma();
-				 }
-				if (docId >= startDocid && docId <=endDocid){
-					temporaryTerms.add(tmp);		
-				}
-			}
-			int[][] documentTerms = new int[3][];
-			if (temporaryTerms.size()>0){
-				documentTerms[0] = new int[temporaryTerms.size()];
-				documentTerms[1] = new int[temporaryTerms.size()];
-				documentTerms[2] = new int[temporaryTerms.size()];
-	 
-				documentTerms[0][0] = ((int[]) temporaryTerms.get(0))[0] - 1;
-				documentTerms[1][0] = ((int[]) temporaryTerms.get(0))[1];
-				documentTerms[2][0] = ((int[]) temporaryTerms.get(0))[2];
-		 
-				if (documentTerms[0].length > 1) {
-					for (int i = 1; i < documentTerms[0].length; i++) {
-						int[] tmpMatrix = (int[]) temporaryTerms.get(i);
-						documentTerms[0][i] = tmpMatrix[0] + documentTerms[0][i - 1];
-						documentTerms[1][i] = tmpMatrix[1];
-						documentTerms[2][i] = tmpMatrix[2];
-			 		}
-				}
-			}
-			return documentTerms;
-		} catch (IOException ioe) {
-			logger.error("Problem reading direct index", ioe);
-			return null;
-		}
-	}
-	*/
-	public int[][] getDocuments(int termid) {
-		 LexiconEntry lEntry = lexicon.getLexiconEntry(termid);
-		if (lEntry == null)
-			return null;
-		return getDocuments(lEntry.startOffset,
-			lEntry.startBitOffset,
-			lEntry.endOffset,
-			lEntry.endBitOffset, lEntry.n_t);
-	}
-	public int[][] getDocumentsWithoutBlocks(int termid) {
-		LexiconEntry lEntry = lexicon.getLexiconEntry(termid);
-		if (lEntry == null)
-			return null;
-		return getDocumentsWithoutBlocks(lEntry.startOffset,
-			lEntry.startBitOffset,
-			lEntry.endOffset,
-			lEntry.endBitOffset, lEntry.n_t);
-	}
-
-	public int[][] getDocumentsWithoutBlocks(LexiconEntry lEntry)
-	{
-		return getDocumentsWithoutBlocks(
-			lEntry.startOffset,
-			lEntry.startBitOffset,
-			lEntry.endOffset,
-			lEntry.endBitOffset, lEntry.n_t);
-	}
-
-	public int[][] getDocumentsWithoutBlocks(long startOffset,  byte startBitOffset, long endOffset, byte endBitOffset, int df)
-	{	
-		int[][] documentTerms = null;
-		try{
-			final BitIn file = this.file.readReset(startOffset, startBitOffset, endOffset, endBitOffset);
-			final int fieldCount = FieldScore.FIELDS_COUNT;
-			 final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
- 			if (loadTagInformation) { //if there are tag information to process		 
-				documentTerms = new int[3][df];
-				documentTerms[0][0] = file.readGamma() - 1;
-				documentTerms[1][0] = file.readUnary();
-				documentTerms[2][0] = file.readBinary(fieldCount);
-				//read the blocks, but dont save them
-				int blockfreq = file.readUnary() - DocumentBlockCountDelta;
-				for (int j = 0; j < blockfreq; j++) {
-					file.readGamma();
-				 }
-				for (int i = 1; i < df; i++) {
-					documentTerms[0][i]  = file.readGamma() + documentTerms[0][i - 1];
-					documentTerms[1][i]  = file.readUnary();
-					documentTerms[2][i]  = file.readBinary(fieldCount);
-					//read the blocks, but dont save them
-			   		blockfreq = file.readUnary() - DocumentBlockCountDelta;
-					for (int j = 0; j < blockfreq; j++) {
-						file.readGamma();
-				 	}
-				}
-			} else { //no tag information to process					
-				documentTerms = new int[2][df];
-				documentTerms[0][0] = file.readGamma() - 1;
-				documentTerms[1][0] = file.readUnary();
-				//read the blocks, but dont save them
-				int blockfreq = file.readUnary() - DocumentBlockCountDelta;
-				for (int j = 0; j < blockfreq; j++) {
-					file.readGamma();
-				 }
-				for(int i = 1; i < df; i++){
-					documentTerms[0][i] = file.readGamma() + documentTerms[0][i - 1];
-					documentTerms[1][i] = file.readUnary();
-					//read the blocks, but dont save them
-					blockfreq = file.readUnary() - DocumentBlockCountDelta;
-					for (int j = 0; j < blockfreq; j++) {
-						file.readGamma();
-				 	}
-				}
-			}
-			return documentTerms;
-		} catch (IOException ioe) {
-			logger.error("Problem reading inverted index", ioe);
-			return null;
-		}
-	}
 }
Index: src/uk/ac/gla/terrier/structures/seralization/FixedSizeTextFactory.java
===================================================================
--- src/uk/ac/gla/terrier/structures/seralization/FixedSizeTextFactory.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/seralization/FixedSizeTextFactory.java	(revision 0)
@@ -0,0 +1,125 @@
+package uk.ac.gla.terrier.structures.seralization;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+
+public class FixedSizeTextFactory implements FixedSizeWriteableFactory<Text> {
+	class FixedSizeText extends Text {
+		public FixedSizeText() {
+			super();
+		}
+
+		public FixedSizeText(byte[] b) {
+			super(b);
+		}
+
+		public FixedSizeText(String s) {
+			super(s);
+		}
+
+		public FixedSizeText(Text t) {
+			super(t);
+		}
+
+		@Override
+		public void readFields(DataInput in) throws IOException {
+			super.readFields(in);
+			//System.err.println("Term "+this.toString() + " read in "+ (this.getLength()+WritableUtils.getVIntSize(this.getLength())) + " bytes");
+			in.skipBytes(maxKeyWrittenSize - (this.getLength()+WritableUtils.getVIntSize(this.getLength())));
+		}
+
+		@Override
+		public void write(DataOutput out) throws IOException {
+			super.write(out);
+			out.write(ZERO_BUFFER, 0, maxKeyWrittenSize - (this.getLength()+WritableUtils.getVIntSize(this.getLength())));
+			//System.err.println("Term "+this.toString() + " written in "+ (this.getLength()+WritableUtils.getVIntSize(this.getLength())) + " bytes");
+		}
+	}
+	
+	final byte[] ZERO_BUFFER;
+	final int termLength;
+	final int maxKeyWrittenSize;
+	
+	public FixedSizeTextFactory(String _termLength)
+	{
+		this(Integer.parseInt(_termLength));
+	}
+	
+	public FixedSizeTextFactory(int _termLength)
+	{
+		this.termLength = _termLength; //TODO : consider non-utf terms - need to derive maximum size
+		this.maxKeyWrittenSize = WritableUtils.getVIntSize(termLength) + 3*termLength;
+		//System.err.println("Max key size, for terms up "+termLength+" is "+ maxKeyWrittenSize);
+		ZERO_BUFFER = new byte[maxKeyWrittenSize];
+	}
+	
+	public int getSize() {
+		
+		return maxKeyWrittenSize;
+	}
+
+	public Text newInstance() {
+		return new FixedSizeText();
+	}
+	
+	public static class Tester
+	{
+		static String makeStringOfLength(char c, int length)
+		{
+			StringBuilder s = new StringBuilder();
+			for(int i = 0; i<length;i++)
+				s.append(c);
+			return s.toString();
+		}
+		
+		@Test public void testVariousStrings() throws Exception
+		{
+			int length = 20;
+			FixedSizeWriteableFactory<Text> factory = new FixedSizeTextFactory(length);
+			int bytes = factory.getSize();
+			
+			String[] testStrings = {
+					"", "a", "abat", 
+					"1234567890", "123456789001234567890",
+					"\u0290\u0290", 
+					makeStringOfLength('\u0290', length),
+					makeStringOfLength('\u0690', length)
+					};
+			for (String s : testStrings)
+			{
+				byte[] b = getBytes(factory, s);
+				assertEquals(b.length, bytes);
+				assertEquals(s, getString(factory, b));
+			}
+		}
+		
+		static String getString(FixedSizeWriteableFactory<Text> factory, byte[] b) throws Exception
+		{
+			ByteArrayInputStream buffer = new ByteArrayInputStream(b);
+			DataInputStream dis = new DataInputStream(buffer);
+			Text t = factory.newInstance();
+			t.readFields(dis);
+			return t.toString();
+		}
+		
+		static byte[] getBytes(FixedSizeWriteableFactory<Text> factory, String s) throws Exception
+		{
+			ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+			DataOutputStream dos = new DataOutputStream(buffer);
+			Text t = factory.newInstance();
+			t.set(s);
+			t.write(dos);
+			return buffer.toByteArray();
+		}
+	}
+}
Index: src/uk/ac/gla/terrier/structures/seralization/FixedSizeWriteableFactory.java
===================================================================
--- src/uk/ac/gla/terrier/structures/seralization/FixedSizeWriteableFactory.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/seralization/FixedSizeWriteableFactory.java	(revision 0)
@@ -0,0 +1,8 @@
+package uk.ac.gla.terrier.structures.seralization;
+
+
+
+public interface FixedSizeWriteableFactory<T> extends WriteableFactory<T>
+{
+    public int getSize();
+}
\ No newline at end of file
Index: src/uk/ac/gla/terrier/structures/seralization/WriteableFactory.java
===================================================================
--- src/uk/ac/gla/terrier/structures/seralization/WriteableFactory.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/seralization/WriteableFactory.java	(revision 0)
@@ -0,0 +1,6 @@
+package uk.ac.gla.terrier.structures.seralization;
+
+public interface WriteableFactory<T>
+{
+    public T newInstance();
+}
\ No newline at end of file
Index: src/uk/ac/gla/terrier/structures/LexiconUtil.java
===================================================================
--- src/uk/ac/gla/terrier/structures/LexiconUtil.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/LexiconUtil.java	(revision 0)
@@ -0,0 +1,22 @@
+package uk.ac.gla.terrier.structures;
+
+import java.util.Iterator;
+import java.util.Map;
+
+public class LexiconUtil {
+
+	@SuppressWarnings("unchecked")
+	public static void printLexicon(Index index, String structureName)
+	{
+		Iterator<Map.Entry<String,LexiconEntry>> lexiconStream = 
+			(Iterator<Map.Entry<String,LexiconEntry>>)index.getIndexStructureInputStream(structureName);
+		while (lexiconStream.hasNext())
+		{
+			Map.Entry<String, LexiconEntry> lee = lexiconStream.next();
+			System.out.println(lee.getKey().toString()+","+lee.getValue().toString());
+		}
+		if (lexiconStream instanceof Closeable) {
+			((Closeable)lexiconStream).close();
+		}
+	}
+}
Index: src/uk/ac/gla/terrier/structures/UTFLexicon.java
===================================================================
--- src/uk/ac/gla/terrier/structures/UTFLexicon.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/UTFLexicon.java	(working copy)
@@ -1,479 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is Lexicon.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Gianni Amati <gba{a.}fub.it> (original author)
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
- *   Craig Macdonald <craigm{a.}.dcs.gla.ac.uk>
- */
-package uk.ac.gla.terrier.structures;
-import java.io.File;
-import java.io.IOException;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.StringTools;
-import uk.ac.gla.terrier.utility.io.RandomDataOutput;
-import org.apache.log4j.Logger;
-
-/**
- * The class that implements the lexicon structure. Apart from the lexicon file,
- * which contains the actual data about the terms, and takes its name from
- * ApplicationSetup.LEXICON_FILENAME, another file is created and
- * used, containing a mapping from the term's code to the offset of the term 
- * in the lexicon. The name of this file is given by 
- * ApplicationSetup.LEXICON_INDEX_FILENAME.
- * 
- * @see ApplicationSetup#LEXICON_FILENAME
- * @see ApplicationSetup#LEXICON_INDEX_FILENAME
- * @author Gianni Amati, Vassilis Plachouras, Craig Macdonald
- * @version $Revision: 1.17 $
- */
-public class UTFLexicon extends Lexicon {
-	/** The logger used */
-	private static Logger logger = Logger.getRootLogger();
-	/** The term represented as an array of bytes.*/
-	protected byte[] termCharacters;
-
-	/** 
-	 * The size in bytes of an entry in the lexicon file.
-	 * An entry corresponds to a string, an int (termCode), 
-	 * an int (docf), an int (tf), a long (the offset of the end 
-	 * of the term's entry in bytes in the inverted file) and
-	 * a byte (the offset in bits of the last byte of the term's entry 
-	 * in the inverted file.
-	 */
-	public static final int lexiconEntryLength =
-	
-		2+ //two bytes for length written by writeUTF
-		ApplicationSetup.STRING_BYTE_LENGTH //the byte representation of the string, ie 3* MAX_TERM_LENGTH
-		
-		+12 //the three integers
-		+8 //the long
-		+1; //the byte
-	
-	/** 
-	 * A default constructor.
-	 */
-	public UTFLexicon() {
-		super();
-		try {
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)UTFLexicon.lexiconEntryLength);
-			bufferInput.mark(3 * lexiconEntryLength);
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/output exception while opening for reading the lexicon file." +
-				" Stack trace follows",ioe);
-		}
-		inputStreamClass = UTFLexiconInputStream.class;
-	}
-    public UTFLexicon(String path, String prefix)
-    {
-        this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-    }
-
-	/**
-	 * Constructs an instace of Lexicon and opens
-	 * the corresponding file.
-	 * 
-	 * @param lexiconName the name of the lexicon file.
-	 */
-	public UTFLexicon(String lexiconName) {
-		super(lexiconName);
-		try {
-			numberOfLexiconEntries = (int) (lexiconFile.length() / (long)UTFLexicon.lexiconEntryLength);
-			bufferInput.mark(3 * lexiconEntryLength);
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/output exception while opening for reading the lexicon file. Stack trace follows",ioe);
-		}
-		inputStreamClass = UTFLexiconInputStream.class;
-	}
-
-	
-	/**
-	 * Finds the term given its term code.
-	 *
-	 * @return true if the term is found, else return false
-	 * @param _termId the term's identifier
-	 */
-	public boolean findTerm(int _termId) {
-		try {
-			idToOffsetFile.seek((long)_termId * 8L);
-			long lexiconOffset = idToOffsetFile.readLong();
-			if (lexiconOffset == 0) {
-				startOffset = 0;
-				startBitOffset = 0;
-				lexiconFile.seek(lexiconOffset);
-				
-				term = lexiconFile.readUTF();				
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			} else {
-				lexiconFile.seek(lexiconOffset - 9);
-				//goes to the lexicon offset minus the long offset and a byte
-				startOffset = lexiconFile.readLong();
-				startBitOffset = lexiconFile.readByte();
-				startBitOffset++;
-				if (startBitOffset == 8) {
-					startBitOffset = 0;
-					startOffset++;
-				}
-				term = lexiconFile.readUTF();				
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while reading the idToOffset file. Stack trace follows.",ioe);
-		}
-		return false;
-	}
-	/** 
-	 * Performs a binary search in the lexicon
-	 * in order to locate the given term.
-	 * If the term is located, the properties
-	 * termCharacters, documentFrequency,
-	 * termFrequency, startOffset, startBitOffset,
-	 * endOffset and endBitOffset contain the
-	 * values related to the term.
-	 * @param _term The term to search for.
-	 * @return true if the term is found, and false otherwise.
-	 */
-	public boolean findTerm(String _term) {
-		byte[] bt = new byte[ApplicationSetup.STRING_BYTE_LENGTH];
-		
-		//int termLength = ApplicationSetup.STRING_BYTE_LENGTH;			
-		//int _termId = 0;
-		long low = -1;
-		long high = numberOfLexiconEntries;
-		long i;
-		String currentTerm = null;
-		while (high-low>1) {
-			
-			i = (long)(high+low)/2;
-			try {
-				lexiconFile.seek((long)i * (long)lexiconEntryLength);
-				currentTerm = lexiconFile.readUTF();
-				//we don't need to take in the padding as we're seeking between entries
-			} catch (IOException ioe) {
-				logger.fatal(
-					"Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-			}
-			
-			if (_term.compareTo(currentTerm) < 1)
-				high = i;
-			else
-				low = i;
-		}
-		if (high == numberOfLexiconEntries)
-			return false;
-		try {
-			lexiconFile.seek((long)high * (long)lexiconEntryLength);
-			currentTerm = lexiconFile.readUTF();
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-		}	
-		
-		if (_term.compareTo(currentTerm) == 0) {
-			try {
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH- StringTools.utf8_length(currentTerm));
-				findTerm(lexiconFile.readInt());
-				return true;
-			}catch(IOException ioe) {
-				logger.fatal("Input/Output exception while reading from lexicon file. Stack trace follows.",ioe);
-			}
-		}
-		return false;
-	}
-
-	/**
-	 * Seeks the i-th entry of the lexicon.
-	 * TODO read a byte array from the file and decode it, 
-	 * 		instead of reading the different pieces of 
-	 *      information separately.
-	 * @param i The index of the entry we are looking for.
-	 * @return true if the entry was found, false otherwise.
-	 */
-	public boolean seekEntry(int i) {
-		try {
-			if (i > numberOfLexiconEntries)
-				return false;
-			if (i == 0) {
-				lexiconFile.seek((long)i * (long)lexiconEntryLength);
-				startOffset = 0;
-				startBitOffset = 0;
-				
-				term = lexiconFile.readUTF();
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-				
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			} else {
-				lexiconFile.seek(
-					(long)i * (long)lexiconEntryLength
-						- (long)lexiconEntryLength
-						+ 2L//two bytes for the string length written by writeUTF
-						+ (long)ApplicationSetup.STRING_BYTE_LENGTH
-						+ 12L);
-				startOffset = lexiconFile.readLong();
-				startBitOffset = lexiconFile.readByte();
-				startBitOffset++;
-				if (startBitOffset == 8) {
-					startBitOffset = 0;
-					startOffset++;
-				}
-
-				term = lexiconFile.readUTF();				
-				lexiconFile.readFully(bt, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-
-				termId = lexiconFile.readInt();
-				documentFrequency = lexiconFile.readInt();
-				termFrequency = lexiconFile.readInt();
-				endOffset = lexiconFile.readLong();
-				endBitOffset = lexiconFile.readByte();
-				return true;
-			}
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while reading the idToOffset file. " +
-				"Stack trace follows.",ioe);
-		}
-		return false;
-	}
-
-	/** Returns a LexiconEntry describing all the information in the lexicon about the term
-	  * denoted by termid
-	  * @param termid the termid of the term of interest
-	  * @return LexiconEntry all information about the term's entry in the lexicon. null if termid not found */
-	public LexiconEntry getLexiconEntry(int termid) {
-		/* TODO: improve this to the effectiveness level of getLexiconEntry() */
-		if (! findTerm(termid))
-			return null;
-		LexiconEntry le = new LexiconEntry();
-		le.termId = this.termId;
-		le.term = this.term.trim();
-		le.TF = this.termFrequency;
-		le.n_t = this.documentFrequency;
-		le.startOffset = this.startOffset;
-		le.startBitOffset = this.startBitOffset;
-		le.endOffset = this.endOffset;
-		le.endBitOffset = this.endBitOffset;
-		return le;
-	}
-	
-	/** Returns a LexiconEntry describing all the information in the lexicon about the term
-	  * denoted by _term
-	  * @param _term the String term that is of interest
-	  * @return LexiconEntry all information about the term's entry in the lexicon. null if termid not found */
-	public LexiconEntry getLexiconEntry(String _term) {
-		int low = -1;
-		int high = (int)numberOfLexiconEntries;
-		int i;
-		int compareStrings;
-		String term;
-		byte[] buffer = new byte[lexiconEntryLength+9]; //to get the start offsets as well
-		
-		if (USE_HASH) {
-			int firstChar = _term.charAt(0);
-			int[] boundaries = (int[])map.get(firstChar);
-			if (boundaries != null)
-			{
-				low = boundaries[0];
-				high = boundaries[1];
-			}
-			//System.out.println("lexicon use hash: " + low + " " + high);
-		}
-		
-		try {
-			while (high-low>1) {
-				
-				i = (high + low)/2;
-				if (i==0) {
-					lexiconFile.seek(0);
-					lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-					term = lexiconFile.readUTF();
-						//new String(buffer,0,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-				} else {
-					lexiconFile.seek((long)i * (long)lexiconEntryLength);
-					term = lexiconFile.readUTF();
-					//term = new String(buffer,9,ApplicationSetup.STRING_BYTE_LENGTH).trim();
-				}
-							
-				if ((compareStrings = _term.compareTo(term))< 0)
-					high = i;
-				else if (compareStrings > 0)
-					low = i;
-				else { //read the rest and return the data
-					if (i==0)
-					{
-						lexiconFile.seek(0);
-						lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-					}
-					else
-					{
-						lexiconFile.seek((long)i * (long)(lexiconEntryLength) -9);
-						lexiconFile.readFully(buffer, 0, lexiconEntryLength+9);
-					}
-					return getLexiconEntryFromBuffer(buffer, term, i);
-				}
-			}
-		
-			if (high == numberOfLexiconEntries)
-				return null;
-			
-			if (high == 0) {
-				lexiconFile.seek(0);
-				term = lexiconFile.readUTF();
-				lexiconFile.seek(0);
-				lexiconFile.readFully(buffer, 0, lexiconEntryLength);
-			} else {
-				lexiconFile.seek((long)high * (long)lexiconEntryLength);
-				term = lexiconFile.readUTF();
-				lexiconFile.seek((long)high * (long)(lexiconEntryLength) -9);
-				lexiconFile.readFully(buffer, 0, lexiconEntryLength+9);
-			}
-			
-			if (_term.compareTo(term) == 0) {
-				return getLexiconEntryFromBuffer(buffer, term, high);
-			}	
-		} catch(IOException ioe) {
-			logger.fatal("IOException while binary searching the lexicon: " , ioe);
-		}
-		return null;
-	}
-
-	protected LexiconEntry getLexiconEntryFromBuffer(byte[] buffer, String term, int index) {
-		int offset;
-		LexiconEntry lEntry = new LexiconEntry();
-		lEntry.term = term;
-		if (index==0) {
-			lEntry.startOffset = 0;
-			lEntry.startBitOffset = 0;
-			offset = ApplicationSetup.STRING_BYTE_LENGTH+2;
-		} else {
-			offset = 0;
-//			lEntry.startOffset =
-//				(((((((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 |
-//					   buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-
-			long startOffset = (buffer[offset++] & 0xff);
-			for (int j=0; j<7; j++)
-				startOffset = startOffset<<8 | (buffer[offset++] & 0xff);
-			lEntry.startOffset = startOffset;
-
-			
-			lEntry.startBitOffset = (byte)(buffer[offset++]&0xff);
-			if (++lEntry.startBitOffset == 8) {
-				lEntry.startBitOffset = 0;
-				lEntry.startOffset++;
-			}
-
-			offset += 2+ApplicationSetup.STRING_BYTE_LENGTH;
-		}
-		lEntry.termId = 
-			(((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-		lEntry.n_t =
-			(((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-		lEntry.TF =
-			(((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-		
-//		lEntry.endOffset = 
-//			(((((((buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 |
-//				   buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff) << 8 | buffer[offset++]&0xff;
-
-		long endOffset = (int)(buffer[offset++] & 0xff);
-		for (int j=0; j<7; j++)
-			endOffset = endOffset<<8 | (buffer[offset++] & 0xff);
-		lEntry.endOffset = endOffset;		
-		lEntry.endBitOffset = (byte)(buffer[offset]&0xff);
-		return lEntry;
-	}
-
-	
-	/**
-	 * In an already stored entry in the lexicon
-	 * file, the information about the term frequency,
-	 * the endOffset in bytes, and the endBitOffset in the last
-	 * byte, is updated. The term is specified by the index of the entry.
-	 *
-	 * @return true if the information is updated properly, 
-	 *         otherwise return false
-	 * @param i the i-th entry
-	 * @param frequency the term's Frequency
-	 * @param endOffset the offset of the ending byte in the inverted file
-	 * @param endBitOffset the offset in bits in the ending byte 
-	 *        in the term's entry in inverted file
-	 * @deprecated The Lexicon class is only used for reading the
-	 *             lexicon file, and not for writing any information.
-	 */
-	public boolean updateEntry(
-		int i,
-		int frequency,
-		long endOffset,
-		byte endBitOffset) {
-		if (! (lexiconFile instanceof RandomDataOutput))
-            return false;
-        RandomDataOutput _lexiconFile = (RandomDataOutput)lexiconFile;
-		try {
-			long lexiconOffset = (long)i * (long)lexiconEntryLength;
-			//we seek the offset where the frequency should be writen
-			_lexiconFile.seek(  //utf length, string max length, termid, tf 
-				lexiconOffset + 2+ ApplicationSetup.STRING_BYTE_LENGTH + 8);
-			_lexiconFile.writeInt(frequency);
-			_lexiconFile.writeLong(endOffset);
-			_lexiconFile.writeByte(endBitOffset);
-		} catch (IOException ioe) {
-			logger.fatal(
-				"Input/Output exception while updating the lexicon file. " +
-				"Stack trace follows.",ioe);
-		}
-		return false;
-	}
-
-    public static int numberOfEntries(File f)
-    {
-        return (int)(f.length()/ (long)lexiconEntryLength);
-    }
-
-    public static int numberOfEntries(String filename)
-    {
-        return numberOfEntries(new File(filename));
-    }
-
-}
Index: src/uk/ac/gla/terrier/structures/Index.java
===================================================================
--- src/uk/ac/gla/terrier/structures/Index.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/Index.java	(working copy)
@@ -377,7 +377,9 @@
 			String structureClassName = properties.getProperty("index."+structureName+".class");
 			if (structureClassName == null)
 			{
-				logger.error("This index ("+this.toString()+") doesnt have an index structure called "+ structureName);
+				logger.error("This index ("+this.toString()+") doesnt have an index structure called "+ structureName 
+						+ ": property index."+structureName+".class not found");
+				logger.error(properties.toString());
 				return null;//TODO exceptions?
 			}
 			//obtain the class definition for the index structure
@@ -385,7 +387,8 @@
 			try{
 				indexStructureClass = Class.forName(structureClassName, false, this.getClass().getClassLoader());
 			} catch (ClassNotFoundException cnfe) {
-				logger.error("This index ("+this.toString()+") references an unknown index structure class: "+structureName+ " looking for "+ structureClassName);
+				logger.error("ClassNotFoundException: This index ("+this.toString()+") references an unknown index structure class: "+structureName+ " looking for "+ structureClassName);
+				cnfe.printStackTrace();
 				return null;//TODO exceptions?
 			}
 
@@ -416,8 +419,20 @@
 						objs[i] = prefix;
 					else if (p.equals("index"))
 						objs[i] = this;
+					else if (p.equals("structureName"))
+					{
+						final String tmp = structureName;
+						objs[i] = tmp.replaceAll("-inputstream$", "");
+					}
 					else if (p.endsWith("-inputstream"))//no caching for input streams
 						 objs[i] = loadIndexStructure(p);
+					else if (p.matches("^\\$\\{.+\\}$"))
+					{
+						String propertyName = p.substring(2,p.length()-1);
+						objs[i] = properties.getProperty(propertyName, ApplicationSetup.getProperty("max.term.length", ""+20));
+						if (objs[i] == null)
+							throw new IllegalArgumentException("Property "+propertyName+" not found");
+					}
 					else
 						objs[i] = getIndexStructure(p);
 					i++;
@@ -492,7 +507,7 @@
 			}
 			try{
 				final OutputStream outputStream = Files.writeFileStream(propertiesFilename); 
-				properties.store(outputStream,"");
+				properties.store(outputStream,this.toString());
 				outputStream.close(); 
 			} catch (IOException ioe) {
 				logger.warn("Could not write to index properties at "+propertiesFilename + " - some changes may be lost", ioe);
@@ -517,9 +532,10 @@
 		return (DirectIndex)getIndexStructure("direct");
 	}
 	/** Return the Lexicon associated with this index */
-	public Lexicon getLexicon()
+	@SuppressWarnings("unchecked")
+	public Lexicon<String> getLexicon()
 	{
-		return (Lexicon)getIndexStructure("lexicon");
+		return (Lexicon<String>)getIndexStructure("lexicon");
 	}
 	/** Return the DocumentIndex associated with this index */
 	public DocumentIndex getDocumentIndex()
Index: src/uk/ac/gla/terrier/structures/ExpansionTerms.java
===================================================================
--- src/uk/ac/gla/terrier/structures/ExpansionTerms.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/ExpansionTerms.java	(working copy)
@@ -31,6 +31,8 @@
 import gnu.trove.TIntHashSet;
 import gnu.trove.TIntObjectHashMap;
 
+import java.util.Map;
+
 import org.apache.log4j.Logger;
 
 import uk.ac.gla.terrier.matching.MatchingQueryTerms;
@@ -55,7 +57,7 @@
 	/** The terms in the top-retrieval documents. */
 	protected TIntObjectHashMap<ExpansionTerm> terms;
 	/** The lexicon used for retrieval. */
-	protected Lexicon lexicon;
+	protected Lexicon<String> lexicon;
 	/** The number of documents in the collection. */
 	protected int numberOfDocuments;
 	/** The number of tokens in the collection. */
@@ -167,7 +169,7 @@
 	* @param totalLength The sum of the length of the top-retrieved documents.
 	* @param lexicon Lexicon The lexicon used for retrieval.
  	*/
-	public ExpansionTerms(CollectionStatistics collStats, double totalLength, Lexicon lexicon) {
+	public ExpansionTerms(CollectionStatistics collStats, double totalLength, Lexicon<String> lexicon) {
 		this(
 				collStats.getNumberOfDocuments(),
 				collStats.getNumberOfTokens(),
@@ -186,7 +188,7 @@
 			long numberOfTokens,
 			double averageDocumentLength,
 			double totalLength, 
-			Lexicon lexicon) {
+			Lexicon<String> lexicon) {
 		this.numberOfDocuments = numberOfDocuments;
 		this.numberOfTokens = numberOfTokens;
 		this.averageDocumentLength = averageDocumentLength;
@@ -255,10 +257,10 @@
 				}
 				
 				double TF = 0;
-				double Nt = 0;
-				lexicon.findTerm(allTerms[i].getTermID());
-				TF = lexicon.getTF();
-				Nt = lexicon.getNt();
+				//double Nt = 0;
+				EntryStatistics ts = lexicon.getLexiconEntry(allTerms[i].getTermID()).getValue();
+				TF = ts.getFrequency();
+				//Nt = ts.getDocumentFrequency();
 				allTerms[i].setWeightExpansion(QEModel.score(
 					allTerms[i].getWithinDocumentFrequency(),
 					TF
@@ -283,9 +285,9 @@
 				logger.info("parameter free query expansion.");
 			}
 		}
-		lexicon.findTerm(allTerms[posMaxWeight].termID);
 		if(logger.isDebugEnabled()){
-		logger.debug("term with the maximum weight: " + lexicon.getTerm() +
+			String term = lexicon.getLexiconEntry(allTerms[posMaxWeight].termID).getKey();
+			logger.debug("term with the maximum weight: " + term +
 				", normaliser: " + Rounding.toString(normaliser, 4));
 		}
 		THashSet<SingleTermQuery> expandedTerms = new THashSet<SingleTermQuery>();
@@ -303,8 +305,8 @@
 					allTerms[i] = temp;
 				}
 				
-				lexicon.findTerm(allTerms[i].getTermID());
-				final SingleTermQuery expandedTerm = new SingleTermQuery(lexicon.getTerm());//new TermTreeNode(lexicon.getTerm());
+				String term = lexicon.getLexiconEntry(allTerms[i].getTermID()).getKey();
+				final SingleTermQuery expandedTerm = new SingleTermQuery(term);
 				
 				expandedTerm.setWeight(allTerms[i].getWeightExpansion()/normaliser);
 				
@@ -323,11 +325,11 @@
 				if (weighedOriginalTermsCount==originalTerms.size())
 					break;
 				
-				lexicon.findTerm(allTerms[i].getTermID());
-				if (!originalTerms.contains(lexicon.getTerm()))
+				String term = lexicon.getLexiconEntry(allTerms[i].getTermID()).getKey();
+				if (!originalTerms.contains(term))
 					continue;
 				weighedOriginalTermsCount++;
-				final SingleTermQuery expandedTerm = new SingleTermQuery(lexicon.getTerm());//new TermTreeNode(lexicon.getTerm());
+				final SingleTermQuery expandedTerm = new SingleTermQuery(term);
 				expandedTerm.setWeight(allTerms[i].getWeightExpansion()/normaliser);
 				//expandedTerms[i].normalisedFrequency = 
 				//terms[i].getWeightExpansion()/normaliser;
@@ -350,7 +352,7 @@
 		this.originalTerms.clear();
 		for (int i=0; i<terms.length; i++){
 			this.originalTerms.add(terms[i]);
-			this.originalTermids.add(query.getTermCode(terms[i]));
+			this.originalTermids.add(query.getStatistics(terms[i]).getTermId());
 		}
 	}
 
@@ -367,9 +369,9 @@
 	 * @param model QueryExpansionModel the used query expansion model.
 	 * @return double the weight of the specified term.
 	 */
-	public double getExpansionWeight(String term, QueryExpansionModel model){
-		lexicon.findTerm(term);
-		return this.getExpansionWeight(lexicon.termId, model);
+	public double getExpansionWeight(String term, QueryExpansionModel model)
+	{
+		return this.getExpansionWeight(lexicon.getLexiconEntry(term).getTermId(), model);
 	}
 	
 	/**
@@ -377,9 +379,9 @@
 	 * @param term String the term to get the weight for.
 	 * @return double the weight of the specified term.
 	 */
-	public double getExpansionWeight(String term){
-		lexicon.findTerm(term);
-		return this.getExpansionWeight(lexicon.termId);
+	public double getExpansionWeight(String term)
+	{
+		return this.getExpansionWeight(lexicon.getLexiconEntry(term).getTermId());
 	}
 	/**
 	 * Returns the un-normalised weight of a given term.
@@ -396,8 +398,7 @@
 	 * @return double the frequency of the specified term in the top-ranked documents.
 	 */
 	public double getFrequency(String term){
-		lexicon.findTerm(term);
-		return this.getFrequency(lexicon.getTermId());
+		return this.getFrequency(lexicon.getLexiconEntry(term).getTermId());
 	}
 	
 	/**
@@ -452,10 +453,11 @@
 				}
 				
 				double TF = 0;
-				double Nt = 0;
-				lexicon.findTerm(allTerms[i].getTermID());
-				TF = lexicon.getTF();
-				Nt = lexicon.getNt();
+				//double Nt = 0;
+				LexiconEntry le = lexicon.getLexiconEntry(allTerms[i].getTermID()).getValue();
+				
+				TF = le.getFrequency();
+				//Nt = le.getDocumentFrequency();
 				allTerms[i].setWeightExpansion(QEModel.score(
 					allTerms[i].getWithinDocumentFrequency(),
 					TF
@@ -479,9 +481,10 @@
 				logger.info("parameter free query expansion.");
 			}
 		}
-		lexicon.findTerm(allTerms[posMaxWeight].termID);
+		
 		if(logger.isDebugEnabled()){
-			logger.debug("term with the maximum weight: " + lexicon.getTerm() +
+			String term = lexicon.getLexiconEntry(allTerms[posMaxWeight].termID).getKey();
+			logger.debug("term with the maximum weight: " + term +
 				", normaliser: " + Rounding.toString(normaliser, 4));
 		}
 		for (int i = 0; i < len; i++){
@@ -508,10 +511,11 @@
 		if (o != null)
 		{
 			double TF = 0;
-			double Nt = 0;
-			lexicon.findTerm(termId);
-			TF = lexicon.getTF();
-			Nt = lexicon.getNt();
+			//double Nt = 0;
+			Map.Entry<String,LexiconEntry> lse = lexicon.getLexiconEntry(termId);
+			TF = lse.getValue().getFrequency();
+			//Nt = lse.getValue().getDocumentFrequency();
+			
 			score = model.score(((ExpansionTerm)o).getWithinDocumentFrequency(),
 					TF,
 					this.totalDocumentLength,
Index: src/uk/ac/gla/terrier/structures/BitFilePosition.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BitFilePosition.java	(revision 0)
+++ src/uk/ac/gla/terrier/structures/BitFilePosition.java	(revision 0)
@@ -0,0 +1,7 @@
+package uk.ac.gla.terrier.structures;
+public interface BitFilePosition
+{
+	public long getBytes();
+	public byte getBits();
+	public void setPosition(long bytes, byte bits);
+}
Index: src/uk/ac/gla/terrier/structures/UTFLexiconInputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/UTFLexiconInputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/UTFLexiconInputStream.java	(working copy)
@@ -1,173 +0,0 @@
-/*
- * Terrier - Terabyte Retriever 
- * Webpage: http://ir.dcs.gla.ac.uk/terrier 
- * Contact: terrier{a.}dcs.gla.ac.uk
- * University of Glasgow - Department of Computing Science
- * http://www.gla.ac.uk/
- * 
- * The contents of this file are subject to the Mozilla Public License
- * Version 1.1 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- *
- * The Original Code is LexiconInputStream.java.
- *
- * The Original Code is Copyright (C) 2004-2009 the University of Glasgow.
- * All Rights Reserved.
- *
- * Contributor(s):
- *   Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk> (original author) 
- *   Craig Macdonald <craigm{a.}.dcs.gla.ac.uk>
- */
-package uk.ac.gla.terrier.structures;
-import java.io.DataInput;
-import java.io.EOFException;
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-
-import uk.ac.gla.terrier.utility.ApplicationSetup;
-import uk.ac.gla.terrier.utility.StringTools;
-/**
- * This class implements an input stream for the lexicon structure.
- * @author Vassilis Plachouras, Craig Macdonald
- * @version $Revision: 1.16 $
- */
-public class UTFLexiconInputStream extends LexiconInputStream {
-	/** A zero buffer for writing to the file.*/
-	protected byte[] junkBuffer = new byte[ApplicationSetup.STRING_BYTE_LENGTH+2];
-	
-	/**
-	 * A default constructor.
-	 */
-	public UTFLexiconInputStream() {
-		super();
-		entrySize = UTFLexicon.lexiconEntryLength;
-		termCharacters = new byte[ApplicationSetup.STRING_BYTE_LENGTH +2];
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param filename java.lang.String the name of the lexicon file.
-	 */
-	public UTFLexiconInputStream(String filename) {
-		super(filename);
-		entrySize = UTFLexicon.lexiconEntryLength;
-		termCharacters = new byte[ApplicationSetup.STRING_BYTE_LENGTH +2];
-	}
-	/**
-	 * A constructor given the filename.
-	 * @param file java.io.File the name of the lexicon file.
-	 */
-	public UTFLexiconInputStream(File file) {
-		super(file);
-		entrySize = UTFLexicon.lexiconEntryLength;
-		termCharacters = new byte[ApplicationSetup.STRING_BYTE_LENGTH +2];
-	}
-	
-	public UTFLexiconInputStream(String path, String prefix) {
-		this(path + ApplicationSetup.FILE_SEPARATOR + prefix + ApplicationSetup.LEXICONSUFFIX);
-	}
-	
-	/** Read a lexicon from the specified input stream */
-	public UTFLexiconInputStream(DataInput in) {
-		super(in);
-		entrySize = UTFLexicon.lexiconEntryLength;
-	}
-
-	/**
-	 * Read the next lexicon entry, where the term is parsed as a string.
-	 * This method does NOT work with getTermCharacters() - use readNextEntryBytes()
-	 * iterator for that.
-	 * @return the number of bytes read if there is no error, 
-	 *		 otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 */
-	public int readNextEntry() throws IOException {
-		try {
-			startBitOffset = (byte) (endBitOffset + 1);
-			startOffset = endOffset;
-			if (startBitOffset == 8) {
-				startOffset = endOffset + 1;
-				startBitOffset = 0;
-			}
-			
-			term = lexiconStream.readUTF();
-			lexiconStream.readFully(junkBuffer, 0, ApplicationSetup.STRING_BYTE_LENGTH - StringTools.utf8_length(term));
-
-			termId = lexiconStream.readInt();
-			documentFrequency = lexiconStream.readInt();
-			termFrequency = lexiconStream.readInt();
-			endOffset = lexiconStream.readLong();
-			endBitOffset = lexiconStream.readByte();
-			numPointersRead += documentFrequency;
-			numTokensRead += termFrequency;
-			numTermsRead++;
-			return Lexicon.lexiconEntryLength;
-		} catch (EOFException eofe) {
-			return -1;
-		}
-	}
-
-	/**
-	 * Read the next lexicon entry, where the term is saved as a byte array. No attempt is
-	 * made to parse the byte array and the padding bytes into a String. Use this method when
-	 * you want to get the bytes of the string using getTermCharacters(). This method does
-	 * NOT work with getTerm()
-	 * @return the number of bytes read if there is no error, 
-	 *		 otherwise returns -1 in case of EOF
-	 * @throws java.io.IOException if an I/O error occurs
-	 */
-	public int readNextEntryBytes() throws IOException {
-		try {
-			startBitOffset = (byte) (endBitOffset + 1);
-			startOffset = endOffset;
-			if (startBitOffset == 8) {
-				startOffset = endOffset + 1;
-				startBitOffset = 0;
-			}
-
-			Arrays.fill(termCharacters, (byte)0);
-			lexiconStream.readFully(termCharacters, 0, ApplicationSetup.STRING_BYTE_LENGTH +2);
-
-			termId = lexiconStream.readInt();
-			documentFrequency = lexiconStream.readInt();
-			termFrequency = lexiconStream.readInt();
-			endOffset = lexiconStream.readLong();
-			endBitOffset = lexiconStream.readByte();
-			numPointersRead += documentFrequency;
-			numTokensRead += termFrequency;
-			numTermsRead++;
-			return Lexicon.lexiconEntryLength;
-		} catch (EOFException eofe) {
-			return -1;
-		}
-	}
-	/**
-	* Returns the number of entries in the lexicon file.
-	*/
-	public int numberOfEntries(){
-			return (int)(lexiconFilelength / UTFLexicon.lexiconEntryLength);
-	}
-
-	/**
-	 * Returns the string representation of the term.
-	 * @return the string representation of the already found term.
-	 */
-	public String getTerm() {
-		return term;
-	}
-	
-	/** 
-	 * Returns the bytes of the String. Only valid is readNextEntryByte was used.
-	 * @return the byte array holding the term's byte representation
-	 */
-	public byte[] getTermCharacters() {
-		return termCharacters;
-	}
-	
-}
Index: src/uk/ac/gla/terrier/structures/BlockInvertedIndexInputStream.java
===================================================================
--- src/uk/ac/gla/terrier/structures/BlockInvertedIndexInputStream.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/BlockInvertedIndexInputStream.java	(working copy)
@@ -26,10 +26,12 @@
  */
 package uk.ac.gla.terrier.structures;
 
-import java.io.IOException;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
 import gnu.trove.TIntArrayList;
-import uk.ac.gla.terrier.compression.BitIn;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+
 import uk.ac.gla.terrier.utility.FieldScore;
 
 /** Reads a BlockInvertedIndex as a stream
@@ -37,38 +39,18 @@
   * @since 2.0
   * @version $Revision: 1.4 $
   */
-public class BlockInvertedIndexInputStream extends InvertedIndexInputStream implements IndexConfigurable 
+public class BlockInvertedIndexInputStream extends InvertedIndexInputStream 
 {
     protected int DocumentBlockCountDelta = 1;
-	/** Make a new BlockInvertedIndexInputStream from the specified path/prefix combo. The LexiconInputStream
-	  * is required to determine the offsets and the document frequency - ie number of postings for
- 	  * each term. */
-	public BlockInvertedIndexInputStream(String path, String prefix, LexiconInputStream lis) throws IOException
-	{
-		super(path, prefix, lis);
-	}
-	
-	/** Make a new BlockInvertedIndexInputStream from the specified filename. The LexiconInputStream
-	  * is required to determine the offsets and the document frequency - ie number of postings for
- 	  * each term.
-	  * @param filename Location of the inverted file to open */
-	public BlockInvertedIndexInputStream(String filename, LexiconInputStream lis) throws IOException
-	{
-		super(filename, lis);
-	}
-
-	public BlockInvertedIndexInputStream(BitIn invFile, LexiconInputStream lis) throws IOException
-	{
-		super(invFile, lis);
-	}
 
-    /** let it know which index to use */
-    public void setIndex(Index i)
+    public BlockInvertedIndexInputStream(Index _index, String structureName, Iterator<Map.Entry<?, ? extends BitIndexPointer>> positions) throws IOException
     {
-        DocumentBlockCountDelta = i.getIntIndexProperty("blocks.invertedindex.countdelta", 1);
+    	super(_index, structureName, positions);
+    	DocumentBlockCountDelta = _index.getIntIndexProperty("blocks.invertedindex.countdelta", 1);
     }
 
-	protected int[][] getNextDocuments(int df, long endByteOffset, byte endBitOffset) throws IOException {
+    protected int[][] getNextDocuments(BitIndexPointer pointer) throws IOException {
+    	final int df = pointer.getNumberOfEntries();
 		final int fieldCount = FieldScore.FIELDS_COUNT;
 		final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
 		
@@ -144,7 +126,7 @@
 		try{
 		while((documents = getNextDocuments()) != null)
 		{
-			System.out.print("tid"+i);
+			System.out.print(i+"th term:");
 			int blockindex = 0;
 			for (int j = 0; j < documents[0].length; j++) {
 				System.out.print(
Index: src/uk/ac/gla/terrier/structures/merging/StructureMerger.java
===================================================================
--- src/uk/ac/gla/terrier/structures/merging/StructureMerger.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/merging/StructureMerger.java	(working copy)
@@ -30,25 +30,28 @@
 
 import java.io.IOException;
 import java.util.Date;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
 
-import uk.ac.gla.terrier.compression.BitOut;
 import uk.ac.gla.terrier.sorting.SortAscendingPairedVectors;
 import uk.ac.gla.terrier.sorting.SortAscendingTripleVectors;
+import uk.ac.gla.terrier.structures.Closeable;
 import uk.ac.gla.terrier.structures.DirectIndex;
 import uk.ac.gla.terrier.structures.DirectInvertedOutputStream;
 import uk.ac.gla.terrier.structures.DocumentIndexInputStream;
 import uk.ac.gla.terrier.structures.FilePosition;
 import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.InvertedIndex;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconOutputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexiconOutputStream;
 import uk.ac.gla.terrier.structures.indexing.DocumentIndexBuilder;
 import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 
-import org.apache.log4j.Logger;
-
 /**
  * This class merges the structures created by Terrier, so that
  * we use fewer and larger inverted and direct files.
@@ -63,8 +66,6 @@
 public class StructureMerger {
 	/** use UTF supporting lexicon */
 	protected final boolean UTFIndexing = Boolean.parseBoolean(ApplicationSetup.getProperty("string.use_utf", "false"));
-	/** build a lexicon hash */
-	protected boolean USE_HASH = Boolean.parseBoolean(ApplicationSetup.getProperty("lexicon.use.hash","true"));
 	
 	/** the logger used */
 	protected static Logger logger = Logger.getRootLogger();
@@ -128,34 +129,6 @@
 		numberOfTerms = 0;
 	}
 	
-	protected static String[] getIndexPathPrefix(String _IFfilename)
-	{
-		
-		String parts[] = _IFfilename.split(ApplicationSetup.FILE_SEPARATOR);
-		String path = _IFfilename.replaceFirst(parts[parts.length -1]+"$", ""); 
-		String prefix = parts[parts.length -1].replaceAll(ApplicationSetup.IFSUFFIX+"$", "");
-		return new String[]{path,prefix};
-	}
-	
-	/**
-	 * A constructor that sets the filenames of the inverted
-	 * files to merge
-	 * @param _srcfilename1 the first inverted file to merge
-	 * @param _srcfilename2 the second inverted file to merge
-	 * @deprecated
-	 */
-	public StructureMerger(String _srcfilename1, String _srcfilename2) {
-		String[] p1 = getIndexPathPrefix(_srcfilename1);
-		String[] p2 = getIndexPathPrefix(_srcfilename2);
-		srcIndex1 = Index.createIndex(p1[0], p1[1]);
-		srcIndex2 = Index.createIndex(p2[0], p2[1]);
-		
-		//invertedFile1 = _filename1;
-		//invertedFile2 = _filename2;
-		numberOfDocuments = 0;
-		numberOfPointers = 0;
-		numberOfTerms = 0;
-	}
 	
 	/**
 	 * Sets the number of bits to write or read for binary encoded numbers
@@ -165,16 +138,7 @@
 		binaryBits = bits;
 	}
 	
-	/**
-	 * Sets the output filename of the merged inverted file
-	 * @param _outputName the filename of the merged inverted file
-	 * @deprecated
-	 */
-	public void setOutputFilename(String _outputName) {
-		//invertedFileOutput = _outputName;
-		String[] p = getIndexPathPrefix(_outputName);
-		destIndex = Index.createNewIndex(p[0], p[1]);
-	}
+
 	
 	/**
 	 * Sets the output index. This index should have no documents
@@ -191,6 +155,7 @@
 	 * lexicon are ot correct. They will be updated only after creating the 
 	 * inverted file.
 	 */
+	@SuppressWarnings("unchecked")
 	protected void mergeInvertedFiles() {
 		try {
 			//getting the number of entries in the first document index, 
@@ -208,13 +173,24 @@
 				termcodeHashmap = new TIntIntHashMap();
 
 			//setting the input streams
-			LexiconInputStream lexInStream1 = (LexiconInputStream)srcIndex1.getIndexStructureInputStream("lexicon");
-			LexiconInputStream lexInStream2 = (LexiconInputStream)srcIndex2.getIndexStructureInputStream("lexicon");
+			Iterator<Map.Entry<String,LexiconEntry>> lexInStream1 = 
+				(Iterator<Map.Entry<String,LexiconEntry>>)srcIndex1.getIndexStructureInputStream("lexicon");
+			Iterator<Map.Entry<String,LexiconEntry>> lexInStream2 = 
+				(Iterator<Map.Entry<String,LexiconEntry>>)srcIndex2.getIndexStructureInputStream("lexicon");
+			
+			for(String property : new String[] {"index.lexicon-keyfactory.class", "index.lexicon-keyfactory.parameter_values",
+					"index.lexicon-keyfactory.parameter_types", "index.lexicon-valuefactory.class", "index.lexicon-valuefactory.parameter_values",
+					"index.lexicon-valuefactory.parameter_types"} )
+			{
+				destIndex.setIndexProperty(property, srcIndex1.getIndexProperty(property, null));
+			}
 			
-			LexiconOutputStream lexOutStream = UTFIndexing
-				? new UTFLexiconOutputStream(destIndex.getPath(), destIndex.getPrefix())
-				: new LexiconOutputStream(destIndex.getPath(), destIndex.getPrefix());
+			FixedSizeWriteableFactory<LexiconEntry> lvf = 
+				(FixedSizeWriteableFactory<LexiconEntry>)srcIndex1.getIndexStructure("lexicon-valuefactory");
 				
+			//setting the output stream
+			LexiconOutputStream<String> lexOutStream = 
+				new FSOMapFileLexiconOutputStream(destIndex, "lexicon", (Class <FixedSizeWriteableFactory<LexiconEntry>>) lvf.getClass());
 
 			int newCodes = (int)srcIndex1.getCollectionStatistics().getNumberOfUniqueTerms(); 
 			
@@ -227,7 +203,7 @@
 					(DirectInvertedOutputStream)invertedFileOutputStreamClass
 					.getConstructor(String.class,Integer.TYPE)
 					.newInstance(destIndex.getPath() + ApplicationSetup.FILE_SEPARATOR +  
-								destIndex.getPrefix() + ApplicationSetup.IFSUFFIX,
+								destIndex.getPrefix() + ".inverted.bf",
 								binaryBits);
 			} catch (Exception e) {
 				logger.error("Couldn't create specified DirectInvertedOutputStream", e);
@@ -237,162 +213,166 @@
 			//BitOut invertedOutput = new BitOutputStream(
 			//	);
 
-			int hasMore1 = -1;
-			int hasMore2 = -1;
+			boolean hasMore1 = false;
+			boolean hasMore2 = false;
 			String term1;
 			String term2;
+			Map.Entry<String,LexiconEntry> lee1 = null;
+			Map.Entry<String,LexiconEntry> lee2 = null;
+			hasMore1 = lexInStream1.hasNext();
+			if (hasMore1)
+				lee1 = lexInStream1.next();
+			hasMore2 = lexInStream2.hasNext();
+			if (hasMore2)
+				lee2 = lexInStream2.next();
+			while (hasMore1 && hasMore2) {
 		
-			hasMore1 = lexInStream1.readNextEntry();
-			hasMore2 = lexInStream2.readNextEntry();
-			while (hasMore1 >=0 && hasMore2 >= 0) {
-				term1 = lexInStream1.getTerm();
-				term2 = lexInStream2.getTerm();
+				term1 = lee1.getKey();
+				term2 = lee2.getKey();
 
 				int lexicographicalCompare = term1.compareTo(term2);
-				//System.err.println("Comparing "+lexInStream1.getTermId() +":"+ term1 + " with "+lexInStream2.getTermId()+ ":"+ term2 + " results="+lexicographicalCompare);
 				if (lexicographicalCompare < 0) {
 					
 					//write to inverted file as well.
-					int[][] docs = inverted1.getDocuments(lexInStream1.getTermId());
+					
+					int[][] docs = inverted1.getDocuments(lee1.getValue());
+					long startOffset = invOS.getByteOffset();
+					byte startBitOffset = invOS.getBitOffset();
+					
 					invOS.writePostings(docs, docs[0][0]+1);
-					//writePostings(docs, docs[0][0]+1, invertedOutput, binaryBits);
 					numberOfPointers+=docs[0].length;
-					long endOffset = invOS.getByteOffset();
-					byte endBitOffset = invOS.getBitOffset();
-					endBitOffset--;
-					if (endBitOffset < 0 && endOffset > 0) {
-						endBitOffset = 7;
-						endOffset--;
-					}
-					
-					lexOutStream.writeNextEntry(term1,
-									   lexInStream1.getTermId(),
-									   lexInStream1.getNt(),
-									   lexInStream1.getTF(),
-									   endOffset,
-									   endBitOffset);
-					hasMore1 = lexInStream1.readNextEntry();
+//					long endOffset = invOS.getByteOffset();
+//					byte endBitOffset = invOS.getBitOffset();
+//					endBitOffset--;
+//					if (endBitOffset < 0 && endOffset > 0) {
+//						endBitOffset = 7;
+//						endOffset--;
+//					}
+					lee1.getValue().setPosition(startOffset, startBitOffset);
+					lexOutStream.writeNextEntry(term1, lee1.getValue());
+					hasMore1 = lexInStream1.hasNext();
+					if (hasMore1)
+						lee1 = lexInStream1.next();
 				
 				} else if (lexicographicalCompare > 0) {
 					//write to inverted file as well.
-					int[][] docs = inverted2.getDocuments(lexInStream2.getTermId());
+					int[][] docs = inverted2.getDocuments(lee2.getValue());
+					long startOffset = invOS.getByteOffset();
+					byte startBitOffset = invOS.getBitOffset();
 					invOS.writePostings(docs, docs[0][0]+numberOfDocs1+1);
-					//writePostings(docs, docs[0][0]+numberOfDocs1+1, invertedOutput, binaryBits);
-					numberOfPointers+=docs[0].length;
-					long endOffset = invOS.getByteOffset();
-					byte endBitOffset = invOS.getBitOffset();
 					
-					endBitOffset--;
-					if (endBitOffset < 0 && endOffset > 0) {
-						endBitOffset = 7;
-						endOffset--;
-					}
+					numberOfPointers+=docs[0].length;
+//					long endOffset = invOS.getByteOffset();
+//					byte endBitOffset = invOS.getBitOffset();
+//					
+//					endBitOffset--;
+//					if (endBitOffset < 0 && endOffset > 0) {
+//						endBitOffset = 7;
+//						endOffset--;
+//					}
 					
 					int newCode = newCodes++;
 					if (keepTermCodeMap)
-						termcodeHashmap.put(lexInStream2.getTermId(), newCode);
-					
-					lexOutStream.writeNextEntry(term2,
-									   			newCode,
-									   			lexInStream2.getNt(),
-									   			lexInStream2.getTF(),
-									   			endOffset,
-									   			endBitOffset);
-					hasMore2 = lexInStream2.readNextEntry();
+						termcodeHashmap.put(lee2.getValue().getTermId(), newCode);
+					lee2.getValue().setTermId(newCode);
+					lee2.getValue().setPosition(startOffset, startBitOffset);
+					lexOutStream.writeNextEntry(term2, lee2.getValue());
+					hasMore2 = lexInStream2.hasNext();
+					if (hasMore2)
+						lee2 = lexInStream2.next();
 				} else {
 					//write to inverted file as well.
-					int[][] docs1 = inverted1.getDocuments(lexInStream1.getTermId());
-					int[][] docs2 = inverted2.getDocuments(lexInStream2.getTermId());
+					int[][] docs1 = inverted1.getDocuments(lee1.getValue());
+					int[][] docs2 = inverted2.getDocuments(lee2.getValue());
+					long startOffset = invOS.getByteOffset();
+					byte startBitOffset = invOS.getBitOffset();
 					invOS.writePostings(docs1, docs1[0][0]+1);
-					//writePostings(docs1, docs1[0][0]+1, invertedOutput, binaryBits);
 					numberOfPointers+=docs1[0].length;
 					invOS.writePostings(docs2, docs2[0][0] + numberOfDocs1 - docs1[0][docs1[0].length-1]);
-					//writePostings(docs2, docs2[0][0] + numberOfDocs1 - docs1[0][docs1[0].length-1], 
-					//					invertedOutput, binaryBits);
 					numberOfPointers+=docs2[0].length;
-					long endOffset = invOS.getByteOffset();
-					byte endBitOffset = invOS.getBitOffset();
-					endBitOffset--;
-					if (endBitOffset < 0 && endOffset > 0) {
-						endBitOffset = 7;
-						endOffset--;
-					}
+//					long endOffset = invOS.getByteOffset();
+//					byte endBitOffset = invOS.getBitOffset();
+//					endBitOffset--;
+//					if (endBitOffset < 0 && endOffset > 0) {
+//						endBitOffset = 7;
+//						endOffset--;
+//					}
+					
 					
-					int newCode = lexInStream1.getTermId();
+					lee1.getValue().setPosition(startOffset, startBitOffset);
+					int newCode = lee1.getValue().getTermId();
 					if (keepTermCodeMap)
-						termcodeHashmap.put(lexInStream2.getTermId(), newCode);
+						termcodeHashmap.put(lee2.getValue().getTermId(), newCode);
+					
+					lee1.getValue().add(lee2.getValue());
+					lexOutStream.writeNextEntry(term1, lee1.getValue());
+					hasMore1 = lexInStream1.hasNext();
+					if (hasMore1)
+						lee1 = lexInStream1.next();
 					
-					lexOutStream.writeNextEntry(term1,
-												newCode,
-												(lexInStream1.getNt() + lexInStream2.getNt()),
-												(lexInStream1.getTF() + lexInStream2.getTF()),
-												endOffset,
-												endBitOffset);
-					hasMore1 = lexInStream1.readNextEntry();
-					hasMore2 = lexInStream2.readNextEntry();
+					hasMore2 = lexInStream2.hasNext();
+					if (hasMore2)
+						lee2 = lexInStream2.next();
 				}
 			}
 			
-			if (hasMore1 >= 0) {
-				while (hasMore1 >= 0) {
-					
+			if (hasMore1) {
+				while (hasMore1) {
+					lee1 = lexInStream1.next();
 					//write to inverted file as well.
-					int[][] docs = inverted1.getDocuments(lexInStream1.getTermId());
+					int[][] docs = inverted1.getDocuments(lee1.getValue());
+					long startOffset = invOS.getByteOffset();
+					byte startBitOffset = invOS.getBitOffset();
 					invOS.writePostings(docs, docs[0][0]+1);
-					//writePostings(docs, docs[0][0]+1, invertedOutput, binaryBits);
 					numberOfPointers+=docs[0].length;
-					long endOffset = invOS.getByteOffset();
-					byte endBitOffset = invOS.getBitOffset();
-					//long endOffset = invertedOutput.getByteOffset();
-					//byte endBitOffset = invertedOutput.getBitOffset();
-					endBitOffset--;
-					if (endBitOffset < 0 && endOffset > 0) {
-						endBitOffset = 7;
-						endOffset--;
-					}
-					
-					lexOutStream.writeNextEntry(lexInStream1.getTerm(),
-									   			lexInStream1.getTermId(),
-									   			lexInStream1.getNt(),
-									   			lexInStream1.getTF(),
-									   			endOffset,
-												endBitOffset);
-					hasMore1 = lexInStream1.readNextEntry();
+//					long endOffset = invOS.getByteOffset();
+//					byte endBitOffset = invOS.getBitOffset();
+//					endBitOffset--;
+//					if (endBitOffset < 0 && endOffset > 0) {
+//						endBitOffset = 7;
+//						endOffset--;
+//					}
+					lee1.getValue().setPosition(startOffset, startBitOffset);
+					lexOutStream.writeNextEntry(lee1.getKey(), lee1.getValue());
+					hasMore1 = lexInStream1.hasNext();
+					if (hasMore1)
+						lee1 = lexInStream1.next();
 				}
-			} else if (hasMore2 >= 0) {
-				while (hasMore2 >= 0) {
+			} else if (hasMore2) {
+				while (hasMore2) {
 					//write to inverted file as well.
-					int[][] docs = inverted2.getDocuments(lexInStream2.getTermId());
+					int[][] docs = inverted2.getDocuments(lee2.getValue());
+					long startOffset = invOS.getByteOffset();
+					byte startBitOffset = invOS.getBitOffset();
 					invOS.writePostings(docs, docs[0][0]+numberOfDocs1+1);
-					//writePostings(docs, docs[0][0]+numberOfDocs1+1, invertedOutput, binaryBits);
-					numberOfPointers+=docs[0].length;
-					long endOffset = invOS.getByteOffset();
-					byte endBitOffset = invOS.getBitOffset();
 					
-					//long endOffset = invertedOutput.getByteOffset();
-					//byte endBitOffset = invertedOutput.getBitOffset();
-					endBitOffset--;
-					if (endBitOffset < 0 && endOffset > 0) {
-						endBitOffset = 7;
-						endOffset--;
-					}
+					numberOfPointers+=docs[0].length;
+//					long endOffset = invOS.getByteOffset();
+//					byte endBitOffset = invOS.getBitOffset();
+//					endBitOffset--;
+//					if (endBitOffset < 0 && endOffset > 0) {
+//						endBitOffset = 7;
+//						endOffset--;
+//					}
 					
 					int newCode = newCodes++;
 					if (keepTermCodeMap)
-						termcodeHashmap.put(lexInStream2.getTermId(), newCode);
-						
-					lexOutStream.writeNextEntry(lexInStream2.getTerm(),
-												newCode,
-												lexInStream2.getNt(),
-												lexInStream2.getTF(),
-												endOffset,
-												endBitOffset);
-					hasMore2 = lexInStream2.readNextEntry();		
+						termcodeHashmap.put(lee2.getValue().getTermId(), newCode);
+					lee2.getValue().setTermId(newCode);
+					lee2.getValue().setPosition(startOffset, startBitOffset);
+					lexOutStream.writeNextEntry(lee2.getKey(), lee2.getValue());
+					hasMore2 = lexInStream2.hasNext();
+					if (hasMore2)
+						lee2 = lexInStream2.next();
 				}		
 			}
 			
-			lexInStream1.close();
-			lexInStream2.close();
+			if (lexInStream1 instanceof Closeable) {
+				((Closeable)lexInStream1).close();
+			}
+			if (lexInStream2 instanceof Closeable) {
+				((Closeable)lexInStream2).close();
+			}
 			
 
 			inverted1.close();
@@ -400,25 +380,16 @@
 			invOS.close();
 			
 			destIndex.setIndexProperty("num.Documents", ""+numberOfDocuments);
-			destIndex.setIndexProperty("num.Pointers", ""+lexOutStream.getNumberOfPointersWritten());
-			destIndex.setIndexProperty("num.Terms", ""+lexOutStream.getNumberOfTermsWritten());
-			destIndex.setIndexProperty("num.Tokens", ""+lexOutStream.getNumberOfTokensWritten());
-			destIndex.addIndexStructure("lexicon", UTFIndexing 
-					? "uk.ac.gla.terrier.structures.UTFLexicon" 
-					: "uk.ac.gla.terrier.structures.Lexicon");
-			destIndex.addIndexStructureInputStream("lexicon", UTFIndexing 
-					? "uk.ac.gla.terrier.structures.UTFLexiconInputStream" 
-					: "uk.ac.gla.terrier.structures.LexiconInputStream");
 			destIndex.addIndexStructure(
 					"inverted", 
 					invertedFileInputClass, 
-					"uk.ac.gla.terrier.structures.Lexicon,java.lang.String,java.lang.String", 
-					"lexicon,path,prefix");
+					"uk.ac.gla.terrier.structures.Index,java.lang.String", 
+					"index,structureName");
 			destIndex.addIndexStructureInputStream(
                     "inverted",
                     invertedFileInputStreamClass,
-                    "java.lang.String,java.lang.String,uk.ac.gla.terrier.structures.LexiconInputStream",
-                    "path,prefix,lexicon-inputstream");
+                    "uk.ac.gla.terrier.structures.Index,java.lang.String,java.util.Iterator",
+                    "index,structureName,lexicon-inputstream");
 			lexOutStream.close();
 			destIndex.flush();
 								
@@ -427,129 +398,6 @@
 		}
 	}
 
-		/**
-	 * Merges the two lexicons into one. After this stage, the offsets in the
-	 * lexicon are not correct. 
-	 */
-	protected void mergeLexicons() {
-		try {
-			//getting the number of entries in the first document index, 
-			//in order to assign the correct docids to the documents 
-			//of the second inverted file.			
-			
-			//creating a new map between new and old term codes
-			if (keepTermCodeMap)
-				termcodeHashmap = new TIntIntHashMap();
-			
-			//setting the input streams
-			final LexiconInputStream lexInStream1 = (LexiconInputStream)srcIndex1.getIndexStructureInputStream("lexicon");
-			final LexiconInputStream lexInStream2 = (LexiconInputStream)srcIndex2.getIndexStructureInputStream("lexicon");
-			
-			final LexiconOutputStream lexOutStream = UTFIndexing
-				? new UTFLexiconOutputStream(destIndex.getPath(), destIndex.getPrefix())
-				: new LexiconOutputStream(destIndex.getPath(), destIndex.getPrefix());
-				
-
-			int newCodes = (int)srcIndex1.getCollectionStatistics().getNumberOfUniqueTerms(); 
-
-			int hasMore1 = -1;
-			int hasMore2 = -1;
-			String term1;
-			String term2;
-		
-			hasMore1 = lexInStream1.readNextEntry();
-			hasMore2 = lexInStream2.readNextEntry();
-			while (hasMore1 >=0 && hasMore2 >= 0) {
-				term1 = lexInStream1.getTerm();
-				term2 = lexInStream2.getTerm();
-
-				int lexicographicalCompare = term1.compareTo(term2);
-				if (lexicographicalCompare < 0) {
-					
-					lexOutStream.writeNextEntry(term1,
-									   lexInStream1.getTermId(),
-									   lexInStream1.getNt(),
-									   lexInStream1.getTF(),
-									   0L,
-									   (byte)0);
-					hasMore1 = lexInStream1.readNextEntry();
-				
-				} else if (lexicographicalCompare > 0) {
-					int newCode = newCodes++;
-					if (keepTermCodeMap)
-						termcodeHashmap.put(lexInStream2.getTermId(), newCode);
-					
-					lexOutStream.writeNextEntry(term2,
-									   			newCode,
-									   			lexInStream2.getNt(),
-									   			lexInStream2.getTF(),
-									   			0L,
-									   			(byte)0);
-					hasMore2 = lexInStream2.readNextEntry();
-				} else {
-					int newCode = lexInStream1.getTermId();
-					if (keepTermCodeMap)
-						termcodeHashmap.put(lexInStream2.getTermId(), newCode);
-					
-					lexOutStream.writeNextEntry(term1,
-												newCode,
-												(lexInStream1.getNt() + lexInStream2.getNt()),
-												(lexInStream1.getTF() + lexInStream2.getTF()),
-												0L,
-												(byte)0);
-					hasMore1 = lexInStream1.readNextEntry();
-					hasMore2 = lexInStream2.readNextEntry();
-				}
-			}
-			
-			if (hasMore1 >= 0) {
-				while (hasMore1 >= 0) {
-									
-					lexOutStream.writeNextEntry(lexInStream1.getTerm(),
-									   			lexInStream1.getTermId(),
-									   			lexInStream1.getNt(),
-									   			lexInStream1.getTF(),
-									   			0L,
-												(byte)0);
-					hasMore1 = lexInStream1.readNextEntry();
-				}
-			} else if (hasMore2 >= 0) {
-				while (hasMore2 >= 0) {
-					int newCode = newCodes++;
-					if  (keepTermCodeMap)
-						termcodeHashmap.put(lexInStream2.getTermId(), newCode);
-						
-					lexOutStream.writeNextEntry(lexInStream2.getTerm(),
-												newCode,
-												lexInStream2.getNt(),
-												lexInStream2.getTF(),
-												0L,
-												(byte)0);
-					hasMore2 = lexInStream2.readNextEntry();		
-				}		
-			}
-			
-			lexInStream1.close();
-			lexInStream2.close();
-			
-			
-			destIndex.setIndexProperty("num.Documents", ""+numberOfDocuments);
-			destIndex.setIndexProperty("num.Pointers", ""+lexOutStream.getNumberOfPointersWritten());
-			destIndex.setIndexProperty("num.Terms", ""+lexOutStream.getNumberOfTermsWritten());
-			destIndex.setIndexProperty("num.Tokens", ""+lexOutStream.getNumberOfTokensWritten());
-			destIndex.addIndexStructure("lexicon", UTFIndexing 
-					? "uk.ac.gla.terrier.structures.UTFLexicon" 
-					: "uk.ac.gla.terrier.structures.Lexicon");
-			destIndex.addIndexStructureInputStream("lexicon", UTFIndexing 
-					? "uk.ac.gla.terrier.structures.UTFLexiconInputStream" 
-					: "uk.ac.gla.terrier.structures.LexiconInputStream");
-			lexOutStream.close();
-			destIndex.flush();
-
-		} catch(IOException ioe) {
-			logger.error("IOException while merging lexicons.", ioe);
-		}
-	}
 
 	/**
 	 * Merges the two direct files and the corresponding document id files.
@@ -710,17 +558,7 @@
 	 * creates the final term code to offset file, and the lexicon hash if enabled.
 	 */
 	protected void createLexidFile() {
-		try {
-			LexiconBuilder.createLexiconIndex(destIndex);
-		} catch(IOException ioe) {
-			logger.error("IOException while creating lexid file.", ioe);
-		}
-		if (USE_HASH)
-			try{
-				LexiconBuilder.createLexiconHash(destIndex);
-			} catch (IOException ioe) {
-				logger.error("IOException while creating lexicon hash file", ioe);
-			}
+		LexiconBuilder.optimise(destIndex, "lexicon");
 	}
 	
 	/**
@@ -743,7 +581,7 @@
 		}
 		else if (bothLexicon)
 		{
-			mergeLexicons();
+			new LexiconMerger(srcIndex1, srcIndex2, destIndex).mergeLexicons();
 			t2 = System.currentTimeMillis();
     	    logger.info("merged lexicons in " + ((t2-t1)/1000.0d));
 		}
@@ -804,7 +642,8 @@
 		long start = System.currentTimeMillis();
 		logger.info("started at " + (new Date()));
 		if (ApplicationSetup.getProperty("merger.onlylexicons","false").equals("true")) {
-			sMerger.mergeLexicons();
+			System.err.println("Use LexiconMerger");
+			return;
 		} else if (ApplicationSetup.getProperty("merger.onlydocids","false").equals("true")) {
 			sMerger.mergeDocumentIndexFiles();
 		} else {
@@ -819,84 +658,7 @@
 		logger.info("time elapsed: " + ((end-start)*1.0d/1000.0d) + " sec.");
 	}
 
-	/**
-	 * Writes the given postings to a bit file. Depending on 
-	 * the value of the field binaryBits, this method will call the 
-	 * appropriate method writeToInvertedFileFields, or
-	 * writeToInvertedFileNoFields.
-	 * @param postings the postings list to write.
-	 * @param firstId the first identifier to write. This can be 
-	 *        an id plus one, or the gap of the current id and the previous one.
-	 * @param output the output bit file.
-	 * @deprecated Please use DirectInvertedOutputStream instead
-	 */
-	public static void writePostings(int[][] postings, int firstId, BitOut output, int binaryBits)
-			throws IOException {
-		if (binaryBits>0) 
-			writeFieldPostings(postings, firstId, output, binaryBits);
-		else 
-			writeNoFieldPostings(postings, firstId, output);
-	}
-	
-	/**
-	 * Writes the given postings to a bit file. This method assumes that
-	 * field information is available as well.
-	 * @param postings the postings list to write.
-	 * @param firstId the first identifier to write. This can be 
-	 *        an id plus one, or the gap of the current id and the previous one.
-	 * @param output the output bit file.
-	 * @deprecated use DirectInvertedIndexOutputStream
-	 */
-	public static void  writeFieldPostings(int[][] postings, int firstId, BitOut output, int binaryBits) 
-			throws IOException {
-		
-		//local variables in order to reduce the number
-		//of times we need to access a two-dimensional array
-		final int[] postings0 = postings[0];
-		final int[] postings1 = postings[1];
-		final int[] postings2 = postings[2];
-		
-		//write the first entry
-		output.writeGamma(firstId);
-		output.writeUnary(postings1[0]);
-		output.writeBinary(binaryBits, postings2[0]);
-	
-		final int length = postings0.length;
-		for (int k = 1; k < length; k++) {
-			output.writeGamma(postings0[k] - postings0[k - 1]);
-			output.writeUnary(postings1[k]);
-			output.writeBinary(binaryBits, postings2[k]);
-		}
-	}
-	
-	/**
-	 * Writes the given postings to a bit file. This method assumes that
-	 * field information is not available.
-	 * @param postings the postings list to write.
-	 * @param firstId the first identifier to write. This can be 
-	 *        an id plus one, or the gap of the current id and the previous one.
-	 * @param output the output bit file.
-	 * @throws IOException if an error occurs during writing to a file.
-	 * @deprecated use DirectInvertedIndexOutputStream
-	 */
-	public static void writeNoFieldPostings(int[][] postings, int firstId, BitOut output) 
-			throws IOException {
 
-		//local variables in order to reduce the number
-		//of times we need to access a two-dimensional array
-		final int[] postings0 = postings[0];
-		final int[] postings1 = postings[1];
-		
-		//write the first entry
-		output.writeGamma(firstId);
-		output.writeUnary(postings1[0]);
-	
-		final int length = postings[0].length;
-		for (int k = 1; k < length; k++) {
-			output.writeGamma(postings0[k] - postings0[k - 1]);
-			output.writeUnary(postings1[k]);
-		}
-	}
 
 
 }
Index: src/uk/ac/gla/terrier/structures/merging/LexiconMerger.java
===================================================================
--- src/uk/ac/gla/terrier/structures/merging/LexiconMerger.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/merging/LexiconMerger.java	(working copy)
@@ -29,14 +29,19 @@
 
 import java.io.IOException;
 import java.util.Date;
+import java.util.Iterator;
+import java.util.Map;
 
+import org.apache.hadoop.io.Text;
 import org.apache.log4j.Logger;
 
+import uk.ac.gla.terrier.structures.Closeable;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.structures.LexiconOutputStream;
-import uk.ac.gla.terrier.structures.UTFLexiconOutputStream;
+import uk.ac.gla.terrier.structures.FSOMapFileLexiconOutputStream;
 import uk.ac.gla.terrier.structures.indexing.LexiconBuilder;
+import uk.ac.gla.terrier.structures.seralization.FixedSizeWriteableFactory;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 
 /**
@@ -74,125 +79,99 @@
 	 * lexicon are not correct. They will be updated only after creating the 
 	 * inverted file.
 	 */
+	@SuppressWarnings("unchecked")
 	public void mergeLexicons() {
 		try {
 			
 			//setting the input streams
-			final LexiconInputStream lexInStream1 = (LexiconInputStream)srcIndex1.getIndexStructureInputStream("lexicon");
-			final LexiconInputStream lexInStream2 = (LexiconInputStream)srcIndex2.getIndexStructureInputStream("lexicon");
+			Iterator<Map.Entry<String,LexiconEntry>> lexInStream1 = 
+				(Iterator<Map.Entry<String,LexiconEntry>>)srcIndex1.getIndexStructureInputStream("lexicon");
+			Iterator<Map.Entry<String,LexiconEntry>> lexInStream2 = 
+				(Iterator<Map.Entry<String,LexiconEntry>>)srcIndex2.getIndexStructureInputStream("lexicon");
+			
+			
+			destIndex.setIndexProperty("lexicon-keyfactory", srcIndex1.getIndexProperty("lexicon-keyfactory", null));
+			destIndex.setIndexProperty("lexicon-valuefactory", srcIndex1.getIndexProperty("lexicon-valuefactory", null));
 			
 		
 			//setting the output stream
-			LexiconOutputStream lexOutStream = UTFIndexing
-				? new UTFLexiconOutputStream(destIndex.getPath(), destIndex.getPrefix())
-				: new LexiconOutputStream(destIndex.getPath(), destIndex.getPrefix());
+			LexiconOutputStream<String> lexOutStream = new FSOMapFileLexiconOutputStream(
+					destIndex.getPath(), destIndex.getPrefix(), 
+					"lexicon", 
+					(FixedSizeWriteableFactory<Text>)destIndex.getIndexStructure("lexicon-keyfactory"));
 			
-			int hasMore1 = -1;
-			int hasMore2 = -1;
+			boolean hasMore1 = false;
+			boolean hasMore2 = false;
 			String term1;
 			String term2;
 
 			int termId = 0;
 		
-			hasMore1 = lexInStream1.readNextEntry();
-			hasMore2 = lexInStream2.readNextEntry();
-			while (hasMore1 >=0 && hasMore2 >= 0) {
-				term1 = lexInStream1.getTerm();
-				term2 = lexInStream2.getTerm();
-				//System.out.println("term1 : " + term1 + "with id " + lexInStream1.getTermId());
-				//System.out.println("term2 : " + term2 + "with id " + lexInStream2.getTermId());
+			hasMore1 = lexInStream1.hasNext();
+			hasMore2 = lexInStream2.hasNext();
+			Map.Entry<String,LexiconEntry> lee1 = null;
+			Map.Entry<String,LexiconEntry> lee2 = null;
+			while (hasMore1 && hasMore2) {
+				lee1 = lexInStream1.next();
+				lee2 = lexInStream2.next();
+				
+				term1 = lee1.getKey();
+				term2 = lee2.getKey();
 				int lexicographicalCompare = term1.compareTo(term2);
 				if (lexicographicalCompare < 0) {
-					
-					lexOutStream.writeNextEntry(term1,
-									   termId,
-									   lexInStream1.getNt(),
-									   lexInStream1.getTF(),
-									   0L,
-									   (byte)0);
+					lee1.getValue().setTermId(termId);
+					lee1.getValue().setPosition(0, (byte)0);
+					lexOutStream.writeNextEntry(term1, lee1.getValue());
 					termId++;
-					hasMore1 = lexInStream1.readNextEntry();
+					hasMore1 = lexInStream1.hasNext();
 				
 				} else if (lexicographicalCompare > 0) {
-					
-					lexOutStream.writeNextEntry(term2,
-									   			termId,
-									   			lexInStream2.getNt(),
-									   			lexInStream2.getTF(),
-									   			0L,
-									   			(byte)0);
+					lee2.getValue().setTermId(termId);
+					lee2.getValue().setPosition(0, (byte)0);
+					lexOutStream.writeNextEntry(term2, lee2.getValue());
 					termId++;
-					hasMore2 = lexInStream2.readNextEntry();
+					hasMore2 = lexInStream2.hasNext();
 				} else {
-					lexOutStream.writeNextEntry(term1,
-												termId,
-												(lexInStream1.getNt() + lexInStream2.getNt()),
-												(lexInStream1.getTF() + lexInStream2.getTF()),
-												0L,
-												(byte)0);
-					hasMore1 = lexInStream1.readNextEntry();
-					hasMore2 = lexInStream2.readNextEntry();
+					lee1.getValue().setTermId(termId);
+					lee1.getValue().setPosition(0, (byte)0);
+					lee1.getValue().add(lee2.getValue());
+					lexOutStream.writeNextEntry(term1, lee1.getValue());
+					hasMore1 = lexInStream1.hasNext();
+					hasMore2 = lexInStream2.hasNext();
 					termId++;
 				}
 			}
 			
-			if (hasMore1 >= 0) {
-				while (hasMore1 >= 0) {
-					lexOutStream.writeNextEntry(lexInStream1.getTerm(),
-									   			termId,
-									   			lexInStream1.getNt(),
-									   			lexInStream1.getTF(),
-									   			0L,
-												(byte)0);
-					hasMore1 = lexInStream1.readNextEntry();
+			if (hasMore1) {
+				while (hasMore1) {
+					lee1.getValue().setTermId(termId);
+					lee1.getValue().setPosition(0, (byte)0);
+					lexOutStream.writeNextEntry(lee1.getKey(), lee1.getValue());
+					hasMore1 = lexInStream1.hasNext();
 					termId++;
 				}
-			} else if (hasMore2 >= 0) {
-				while (hasMore2 >= 0) {
-					lexOutStream.writeNextEntry(lexInStream2.getTerm(),
-												termId,
-												lexInStream2.getNt(),
-												lexInStream2.getTF(),
-												0L,
-												(byte)0);
-					hasMore2 = lexInStream2.readNextEntry();
+			} else if (hasMore2) {
+				while (hasMore2) {
+					lee1.getValue().setTermId(termId);
+					lee1.getValue().setPosition(0, (byte)0);
+					lexOutStream.writeNextEntry(lee2.getKey(), lee2.getValue());
+					hasMore2 = lexInStream2.hasNext();
 					termId++;
 				}		
 			}
 			
-			lexInStream1.close();
-			lexInStream2.close();
-			destIndex.setIndexProperty("num.Pointers", ""+lexOutStream.getNumberOfPointersWritten());
-			destIndex.setIndexProperty("num.Terms", ""+lexOutStream.getNumberOfTermsWritten());
-			destIndex.setIndexProperty("num.Tokens", ""+lexOutStream.getNumberOfTokensWritten());
-			destIndex.addIndexStructure("lexicon", UTFIndexing
-					? "uk.ac.gla.terrier.structures.UTFLexicon"
-					: "uk.ac.gla.terrier.structures.Lexicon");
-			destIndex.addIndexStructureInputStream("lexicon", UTFIndexing
-					? "uk.ac.gla.terrier.structures.UTFLexiconInputStream"
-					: "uk.ac.gla.terrier.structures.LexiconInputStream");
+			if (lexInStream1 instanceof Closeable) {
+				((Closeable)lexInStream1).close();
+			}
+			if (lexInStream2 instanceof Closeable) {
+				((Closeable)lexInStream2).close();
+			}
 			lexOutStream.close();
+			LexiconBuilder.optimise(destIndex, "lexicon");
 			destIndex.flush();
 		} catch(IOException ioe) {
 			logger.error("IOException while merging lexicons.", ioe);
 		}
-		// create an empty lexid file
-		//try{
-		//	BufferedWriter bw = new BufferedWriter(Files.writeFileWriter(
-		//			this.lexiconFileOutput+"id"));
-		//	bw.write(" ");
-		//	bw.close();
-		//}
-		//catch(IOException e){
-		//	e.printStackTrace();
-		//}
-		try{
-			LexiconBuilder.createLexiconIndex(destIndex);
-			if (USE_HASH)
-				LexiconBuilder.createLexiconHash(destIndex);
-		} catch (IOException ioe) {
-			logger.warn("Problems writing lexicon lexid or lexicon hash", ioe);
-		}
 	}
 	public static void main(String[] args) {
 
Index: src/uk/ac/gla/terrier/structures/merging/BlockStructureMerger.java
===================================================================
--- src/uk/ac/gla/terrier/structures/merging/BlockStructureMerger.java	(revision 2526)
+++ src/uk/ac/gla/terrier/structures/merging/BlockStructureMerger.java	(working copy)
@@ -26,8 +26,8 @@
 package uk.ac.gla.terrier.structures.merging;
 import java.io.IOException;
 import java.util.Date;
+
 import uk.ac.gla.terrier.compression.BitOut;
-import uk.ac.gla.terrier.compression.BitOutputStream;
 import uk.ac.gla.terrier.sorting.SortAscendingQuadrupleVectors;
 import uk.ac.gla.terrier.sorting.SortAscendingQuintupleVectors;
 import uk.ac.gla.terrier.structures.BlockDirectInvertedOutputStream;
@@ -53,22 +53,7 @@
  */
 public class BlockStructureMerger extends StructureMerger {
 	
-	/**
-	 * A constructor that sets the filenames of the inverted
-	 * files to merge
-	 * @param _filename1 the first inverted file to merge
-	 * @param _filename2 the second inverted file to merge
-	 * @deprecated
-	 */
-	public BlockStructureMerger(String _filename1, String _filename2) {
-		super(_filename1, _filename2);
-		directFileOutputStreamClass = BlockDirectInvertedOutputStream.class;
-		directFileInputClass = "uk.ac.gla.terrier.structures.BlockDirectIndex";
-		directFileInputStreamClass = "uk.ac.gla.terrier.structures.BlockDirectIndexInputStream";
-		invertedFileOutputStreamClass = BlockDirectInvertedOutputStream.class;
-		invertedFileInputClass = "uk.ac.gla.terrier.structures.BlockInvertedIndex";
-		invertedFileInputStreamClass = "uk.ac.gla.terrier.structures.BlockInvertedIndexInputStream";
-	}
+	
 	
 	public BlockStructureMerger(Index _srcIndex1, Index _srcIndex2, Index _destIndex)
 	{
@@ -81,15 +66,7 @@
 		invertedFileInputStreamClass = "uk.ac.gla.terrier.structures.BlockInvertedIndexInputStream";
 	}
 
-	/** write Block postings.
-	  * @deprecated Use BlockDirectInvertedOutputStream instead */
-	public static void writeBlockPostings(int[][] postings, int firstId, BitOutputStream output, int binaryBits)
-            throws IOException {
-        if (binaryBits>0)
-            writeFieldPostings(postings, firstId, output, binaryBits);
-        else
-            writeNoFieldPostings(postings, firstId, output);
-    }
+	
 	
 	
 	/**
@@ -268,7 +245,8 @@
 		long start = System.currentTimeMillis();
 		logger.info("started at " + (new Date()));
 		if (ApplicationSetup.getProperty("merger.onlylexicons","false").equals("true")) {
-			sMerger.mergeLexicons();
+			System.err.println("Use LexiconMerger");
+			return;
 		} else if (ApplicationSetup.getProperty("merger.onlydocids","false").equals("true")) {
 			sMerger.mergeDocumentIndexFiles();
 		} else {
@@ -280,104 +258,5 @@
 		logger.info("time elapsed: " + ((end-start)*1.0d/1000.0d) + " sec.");
 	}
 	
-	
-	
-	
-	
-	
-	
-	
-	
-	
-	
-	
-	/** write Block postings with fields.
-	* @deprecated Use BlockDirectInvertedOutputStream instead */	
-	public static void  writeFieldPostings(int[][] postings, int firstId, final BitOutputStream output, final int binaryBits)
-	throws IOException {
-
-		//local variables in order to reduce the number
-		//of times we need to access a two-dimensional array
-		final int[] postings0 = postings[0];
-		final int[] postings1 = postings[1];
-		final int[] postings2 = postings[2];
-		final int[] postings3 = postings[3];
-		final int[] postings4 = postings[4];
-		
-		//write the first posting from the term's postings list
-		output.writeGamma(firstId);						//write document id 
-		output.writeUnary(postings1[0]);    			//write frequency
-		output.writeBinary(binaryBits, postings2[0]);	//write fields if binaryBits>0
-		int blockIndex = 0;								//the index of the current block id
-		int blockFrequency = postings3[0];				//the number of block ids to write
-		output.writeUnary(blockFrequency);    			//write block frequency
-		output.writeGamma(postings4[blockIndex]+1);	//write the first block id
-		blockIndex++;									//move to the next block id
-		for (int i=1; i<blockFrequency; i++) {			//write the next blockFrequency-1 ids
-			//write the gap between consequtive block ids
-			output.writeGamma(postings4[blockIndex]-postings4[blockIndex-1]);
-			blockIndex++;
-		}
-		
-		//write the rest of the postings from the term's postings list
-		final int length = postings[0].length;
-		for (int k = 1; k < length; k++) {
-			output.writeGamma(postings0[k] - postings0[k - 1]);	//write gap of document ids
-			output.writeUnary(postings1[k]);					//write term frequency
-			output.writeBinary(binaryBits, postings2[k]);		//write fields if binaryBits>0
-			blockFrequency = postings3[k];						//number of block ids to write
-			output.writeUnary(blockFrequency);					//write block frequency
-			output.writeGamma(postings4[blockIndex]+1);			//write the first block id
-			blockIndex++;										//move to the next block id
-			for (int i=1; i<blockFrequency; i++) {
-				//write the gap between consequtive block ids
-				output.writeGamma(postings4[blockIndex]-postings4[blockIndex-1]);
-				blockIndex++;
-			}
-		}
-	}
-
-	/** write Block postings with fields.
-	* @deprecated Use BlockDirectInvertedOutputStream instead */	
-	public static void writeNoFieldPostings(int[][] postings, int firstId, final BitOutputStream output) 
-		throws IOException {
-		
-		//local variables in order to reduce the number
-		//of times we need to access a two-dimensional array
-		final int[] postings0 = postings[0];
-		final int[] postings1 = postings[1];
-		final int[] postings3 = postings[3];
-		final int[] postings4 = postings[4];
-		
-		//write the first posting from the term's postings list
-		output.writeGamma(firstId);						//write document id 
-		output.writeUnary(postings1[0]);    			//write frequency
-		int blockIndex = 0;								//the index of the current block id
-		int blockFrequency = postings3[0];				//the number of block ids to write
-		output.writeUnary(blockFrequency);    			//write block frequency
-		output.writeGamma(postings4[blockIndex]+1);		//write the first block id
-		blockIndex++;									//move to the next block id
-		for (int i=1; i<blockFrequency; i++) {			//write the next blockFrequency-1 ids
-			//write the gap between consequtive block ids
-			output.writeGamma(postings4[blockIndex]-postings4[blockIndex-1]);
-			blockIndex++;
-		}
-		
-		//write the rest of the postings from the term's postings list
-		final int length = postings0.length;
-		for (int k = 1; k < length; k++) {
-			output.writeGamma(postings0[k] - postings0[k - 1]);	//write gap of document ids
-			output.writeUnary(postings1[k]);					//write term frequency
-			blockFrequency = postings3[k];							//number of block ids to write
-			output.writeUnary(blockFrequency);				//write block frequency
-			output.writeGamma(postings4[blockIndex]+1);		//write the first block id
-			blockIndex++;											//move to the next block id
-			for (int i=1; i<blockFrequency; i++) {
-				//write the gap between consequtive block ids
-				output.writeGamma(postings4[blockIndex]-postings4[blockIndex-1]);
-				blockIndex++;
-			}
-		}		
-	}
 }
 
Index: src/uk/ac/gla/terrier/compression/BitInSeekable.java
===================================================================
--- src/uk/ac/gla/terrier/compression/BitInSeekable.java	(revision 2526)
+++ src/uk/ac/gla/terrier/compression/BitInSeekable.java	(working copy)
@@ -52,4 +52,17 @@
 	 * @return Returns the BitIn object to use to read that data
 	 */	
 	public BitIn readReset(long startByteOffset, byte startBitOffset, long endByteOffset, byte endBitOffset) throws IOException;
+	
+	/**
+	 * Reads from the file a specific number of bytes and after this
+	 * call, a sequence of read calls may follow. The offsets given 
+	 * as arguments are inclusive. For example, if we call this method
+	 * with arguments 0, 2, 1, 7, it will read in a buffer the contents 
+	 * of the underlying file from the third bit of the first byte to the 
+	 * last bit of the second byte.
+	 * @param startByteOffset the starting byte to read from
+	 * @param startBitOffset the bit offset in the starting byte
+	 * @return Returns the BitIn object to use to read that data
+	 */
+	public BitIn readReset(long startByteOffset, byte startBitOffset) throws IOException;
 }
Index: src/uk/ac/gla/terrier/compression/BitFileBuffered.java
===================================================================
--- src/uk/ac/gla/terrier/compression/BitFileBuffered.java	(revision 0)
+++ src/uk/ac/gla/terrier/compression/BitFileBuffered.java	(revision 0)
@@ -0,0 +1,341 @@
+package uk.ac.gla.terrier.compression;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+import uk.ac.gla.terrier.utility.Files;
+import uk.ac.gla.terrier.utility.io.RandomDataInput;
+
+
+/** Implementation of BitInSeekable/BitIn interfaces similar to BitFile. However this
+  * class buffers only a small area of the posting list, to minimise large memory 
+  * allocations during retrieval. In contrast to BitFile, this class is read-only.
+  * @author Patrice Lacour
+  * @version $Revision: $
+  */
+public class BitFileBuffered implements BitInSeekable { 
+	/** how much of a file to buffer by default */	
+	protected static int DEFAULT_BUFFER_LENGTH = 8*1024;
+	/** The logger used */
+	protected static Logger logger = Logger.getRootLogger();
+	/** The underlying file */
+	protected RandomDataInput file;
+	/** how much of this file we will buffer */
+	protected final int buffer_size;
+	protected long fileSize;
+
+    /** 
+     * Constructs an instance of the class for a given filename, using the default buffer size
+     * @param file the underlying file
+     * @throws IOException if an I/O error occurs
+     */	
+	public BitFileBuffered(File file) {
+		this(file, DEFAULT_BUFFER_LENGTH);
+	}
+
+    /** 
+     * Constructs an instance of the class for a given filename. Default buffer size
+     * @param filename java.lang.String the name of the underlying file
+     * @throws IOException if an I/O error occurs
+     */
+	public BitFileBuffered(String filename) {
+		this(filename, DEFAULT_BUFFER_LENGTH);
+	}
+	
+    /** 
+     * Constructs an instance of the class for a given filename 
+     * @param file the underlying file
+     * @param bufSize how much of the file to buffer
+     * @throws IOException if an I/O error occurs
+     */
+	public BitFileBuffered(File file, int bufSize) {
+		buffer_size = bufSize;
+		try {
+			this.file = Files.openFileRandom(file);
+		} catch (IOException ioe) {
+			logger.error("Input/Output exception while creating BitFileBuffered object.", ioe);
+		}	
+	}
+	
+	/** 
+	 * Constructs an instance of the class for a given filename
+	 * @param filename java.lang.String the name of the underlying file
+	 * @param bufSize how much of the file to buffer
+	 * @throws IOException if an I/O error occurs
+	 */
+	public BitFileBuffered(String filename, int bufSize) {
+		buffer_size = bufSize;
+		try {
+			fileSize = Files.length(filename);
+			file =  Files.openFileRandom(filename);
+		} catch (IOException ioe) {
+			logger.error("Input/Output exception while creating BitFileBuffered object.", ioe);
+		}	
+	}
+	
+	
+	/**
+	 * Reads from the file a specific number of bytes and after this
+	 * call, a sequence of read calls may follow. The offsets given 
+	 * as arguments are inclusive. For example, if we call this method
+	 * with arguments 0, 2, 1, 7, it will read in a buffer the contents 
+	 * of the underlying file from the third bit of the first byte to the 
+	 * last bit of the second byte.
+	 * @param startByteOffset the starting byte to read from
+	 * @param startBitOffset the bit offset in the starting byte
+	 * @param endByteOffset the ending byte 
+	 * @param endBitOffset the bit offset in the ending byte. 
+	 *        This bit is the last bit of this entry.
+	 * @return Returns the BitIn object to use to read that data
+	 */	
+	public BitIn readReset(long startByteOffset, byte startBitOffset, long endByteOffset, byte endBitOffset) {
+		final long range = endByteOffset - startByteOffset + (long)1;
+		return new BitInBuffered(file,startByteOffset,startBitOffset, range < buffer_size ? (int)range : buffer_size);
+	}
+	
+	
+	/**
+	 * Reads from the file from a specific offset. After this
+	 * call, a sequence of read calls may follow.
+	 * @param startByteOffset the starting byte to read from
+	 * @param startBitOffset the bit offset in the starting byte
+	 */
+	public BitIn readReset(long startByteOffset, byte startBitOffset) 
+	{
+		final long actualBufferSize = (startByteOffset + buffer_size) > fileSize 
+			? (fileSize - startByteOffset) 
+			: buffer_size;
+		return new BitInBuffered(file,startByteOffset,startBitOffset, (int)actualBufferSize);
+	}
+	
+	public void close()
+	{
+		try {
+			file.close();	
+		} catch(IOException ioe) {
+			logger.error("Input/Output exception while reading from a random access file. Stack trace follows", ioe);
+		}
+	
+	}
+
+	
+	class BitInBuffered implements BitIn
+	{
+		private RandomDataInput parentFile;
+		private long offset;
+		private byte[] inBuffer;
+		private int size;
+		private int readByteOffset;
+		private int bitOffset;
+	
+		public BitInBuffered(RandomDataInput file, long startByteOffset, byte bitOffset, int bufLength)
+		{
+			try{
+				this.offset = startByteOffset;
+				this.bitOffset= bitOffset;
+				this.parentFile = file;
+				this.size = bufLength;
+				parentFile.seek(startByteOffset);
+				inBuffer = new byte[size];
+				parentFile.readFully(inBuffer);
+				readByteOffset = 0;
+			}catch(IOException ioe){
+				logger.error("Input/Output exception while reading from a random access file. Stack trace follows", ioe);
+			}
+		}
+		
+		
+		/* algorithm in this class: 
+			for every byte read	
+				if we exceed current buffer
+					seek parentFile if needed
+					read (size) more from parentFile
+				end if
+			for a skip
+				if skip exceed current buffer
+					seek parent file to end of skip
+					read (size) more from parentFile
+				end if
+		*/
+		
+		
+		
+		private void incrByte()
+		{
+			try{		
+				readByteOffset++;
+				offset++;
+				if(readByteOffset == size)
+				{
+					
+					readByteOffset=0;
+					inBuffer = new byte[size];
+					parentFile.seek(offset);
+					//logger.info("Reading 1024 bytes. pos="+parentFile.getFilePointer());
+					try{
+						parentFile.readFully(inBuffer);
+					} catch (EOFException eofe) { /* ignore this */}
+				}
+			}catch(IOException ioe){
+				logger.error("Input/Output exception while reading from a random access file. Stack trace follows", ioe);
+			}
+		}
+	
+	
+	
+		private void incrByte(int i)
+		{
+			try{
+				//System.out.println("skypping");
+				offset += i;
+				readByteOffset+=i;
+				if( readByteOffset >= size ) // we go to the next block  -- we skip only the begin of the block
+				{
+					parentFile.seek(offset); // we skip the first bytes of the next block
+					inBuffer = new byte[size];
+					readByteOffset = 0;
+					//logger.info("Reading 1024 bytes. pos="+parentFile.getFilePointer());
+					try{
+						parentFile.readFully(inBuffer);
+					} catch (EOFException eofe) { /* ignore this */}
+				}
+				
+			}catch(IOException ioe){
+				logger.error("Input/Output exception while reading from a random access file. Stack trace follows", ioe);
+			}
+		}
+		
+		/*
+	 	* Reads a gamma encoded integer from the underlying stream
+	 	* @return the number read
+	 	* @throws IOException if an I/O error occurs
+	 	*/
+		public int readGamma()  {
+			int u = readUnary() - 1;		
+			return (1 << u) + readBinary(u) ;
+		}
+		
+
+		/**
+	 	* Reads a unary encoded integer from the underlying stream 
+	 	* @return the number read
+	 	* @throws IOException if an I/O error occurs
+	 	*/
+		public int readUnary() {
+			int x;
+			final int leftA = (inBuffer[readByteOffset] << bitOffset) & 0x00FF;		
+			if(leftA != 0){			
+				x = 8 - BitUtilities.MSB_BYTES[ leftA ];
+				bitOffset += x ;
+				readIn();
+				return x;
+			}
+			x = 8 - bitOffset;
+			incrByte();
+			while( (inBuffer[readByteOffset]== 0 )) {
+				x += 8;
+				incrByte();
+			}
+			x += (bitOffset =  8 -  BitUtilities.MSB_BYTES[ inBuffer[readByteOffset] & 0x00FF] );
+			readIn();
+			return x;		
+		}
+	
+		/**
+		 * Reads a new byte from the InputStream if we have finished with the current one. 
+		 * @throws IOException if we have reached the end of the file
+		 */
+		protected void readIn(){
+			if(bitOffset == 8){					
+				bitOffset = 0;
+				incrByte();	 						
+			}
+		}
+
+
+    	/**
+     	* Aligns the stream to the next byte
+    	 * @throws IOException if an I/O error occurs
+    	 */
+   	 public void align(){
+     	   if ( ( bitOffset & 7 ) == 0 ) return;
+     	   bitOffset = 0;
+    	    incrByte();
+   	 }
+	
+		/**
+	 	* Reads a binary integer from the already read buffer.
+	 	* @param len is the number of binary bits to read
+	 	* @throws IOException if an I/O error occurs
+		 * @return the decoded integer
+		 */
+	public int readBinary(int len) {
+		if(8 - bitOffset > len){					
+			int b = ( ((inBuffer[readByteOffset] << bitOffset) & 0x00FF)) >>> (8-len) ;
+			bitOffset += len;
+			return b;
+		}
+		
+		int x = inBuffer[readByteOffset] & ( ~ (0xFF << (8-bitOffset) )) &0xFF;
+		len +=  bitOffset - 8;
+		int i = len >> 3;
+		while(i-- != 0){			
+			incrByte();
+			x = x << 8 | (inBuffer[readByteOffset] & 0xFF); 
+		}		
+		incrByte();
+		bitOffset = len & 7;	
+		return (x << bitOffset) | ((inBuffer[readByteOffset] & 0xFF) >>> (8-bitOffset)) ;
+	}
+
+		/** Skip a number of bits in the current input stream
+	 	* @param len The number of bits to skip
+	 	*/
+    	public void skipBits(int len)
+    	{
+    		if(8 - bitOffset > len){
+				bitOffset += len;	
+				return;
+			}
+			len +=  bitOffset - 8;
+   	   final int i = len >> 3;
+   	   if (i > 0)
+   	   {
+   	      incrByte(i);
+   	   }
+			incrByte();
+			bitOffset = len & 7;
+		}
+		
+	public long getByteOffset(){ return offset;}
+	/**
+	 * Returns the bit offset in the last byte.
+	 * It corresponds to the position in which
+	 * the next bit will be written.
+	 * Use only when writting.
+	 * @return the bit offset in the stream.
+	 */
+	public byte getBitOffset(){return (byte) bitOffset;}
+	
+	
+	/**
+	 * Closes the file. If the file has been written, it is also flushed to disk. 
+	 * @throws IOException if an I/O error occurs.
+	 */
+	
+	public void close(){
+		/*try{
+			file.close();
+		}catch(IOException ioe){
+			logger.error("Input/Output exception while closing BitFile object.", ioe);
+		}*/
+	}
+		
+		
+		
+   }	
+}
+	
Index: src/uk/ac/gla/terrier/compression/BitFileInMemory.java
===================================================================
--- src/uk/ac/gla/terrier/compression/BitFileInMemory.java	(revision 2526)
+++ src/uk/ac/gla/terrier/compression/BitFileInMemory.java	(working copy)
@@ -95,6 +95,11 @@
 		return new BitInReader(startByteOffset, startBitOffset, endByteOffset, endBitOffset);
 	}
 
+	public BitIn readReset(long startByteOffset, byte startBitOffset) 
+	{
+		return new BitInReader(startByteOffset, startBitOffset);
+	}
+
 	/** Close this object. Does nothing. */
 	public void close()
 	{
@@ -107,12 +112,16 @@
     	protected int bitOffset;
 		protected int readByteOffset;
 		
-		public BitInReader(long startByteOffset, byte startBitOffset, long endByteOffset, byte endBitOffset)
+		public BitInReader(long startByteOffset, byte startBitOffset)
 		{
-			
 			readByteOffset = (int)startByteOffset;
 			bitOffset = startBitOffset;
 		}
+		
+		public BitInReader(long startByteOffset, byte startBitOffset, long endByteOffset, byte endBitOffset)
+		{
+			this(startByteOffset, startBitOffset);
+		}
 		/**
 		* Returns the byte offset of the stream.
 		* It corresponds to the position of the
Index: src/uk/ac/gla/terrier/compression/BitFile.java
===================================================================
--- src/uk/ac/gla/terrier/compression/BitFile.java	(revision 2526)
+++ src/uk/ac/gla/terrier/compression/BitFile.java	(working copy)
@@ -316,6 +316,11 @@
 		return this;
 	}
 	
+
+	public BitIn readReset(long startByteOffset, byte startBitOffset) throws IOException {
+		throw new IOException("Unsupported");
+	}
+	
 	/**
 	 * Reads a gamma encoded integer from the underlying stream
 	 * @return the number read
@@ -635,4 +640,5 @@
 		if(b > 0 ) return readMinimalBinary(b);
 		else return 0;
 	}
+
 }
Index: src/uk/ac/gla/terrier/applications/TRECLMIndexing.java
===================================================================
--- src/uk/ac/gla/terrier/applications/TRECLMIndexing.java	(revision 2526)
+++ src/uk/ac/gla/terrier/applications/TRECLMIndexing.java	(working copy)
@@ -69,11 +69,15 @@
 			return;
 		}
 	
+		try{
 		CreateTermEstimateIndex teIndex = new CreateTermEstimateIndex(index, modelName);
 		teIndex.createTermEstimateIndex();
 		
 		CreateDocumentInitialWeightIndex docWIndex = new CreateDocumentInitialWeightIndex(index, modelName);
 		docWIndex.createDocumentInitialWeightIndex();
+		} catch (Exception e) {
+			logger.error("Could not make LM structures", e);
+		}
 	}
 	
 	/** 
Index: src/uk/ac/gla/terrier/matching/Matching.java
===================================================================
--- src/uk/ac/gla/terrier/matching/Matching.java	(revision 2526)
+++ src/uk/ac/gla/terrier/matching/Matching.java	(working copy)
@@ -114,7 +114,7 @@
 	/** The document index used.*/
 	protected DocumentIndex docIndex;
 	/** The lexicon used.*/
-	protected Lexicon lexicon;
+	protected Lexicon<String> lexicon;
 	/** The inverted file.*/
 	protected InvertedIndex invertedIndex;
 	/** The collection statistics */
@@ -334,17 +334,18 @@
 			//the TermCodes class, the assigned term code is only valid during the indexing
 			//process. Therefore, at this point, the term code should be updated with the one
 			//stored in the lexicon file.	
-			queryTerms.setTermProperty(queryTermStrings[i], lEntry.termId);
+			queryTerms.setTermProperty(queryTermStrings[i], lEntry);
 			//the weighting model is prepared for assigning scores to documents
 			wmodel.setKeyFrequency(queryTerms.getTermWeight(queryTermStrings[i]));
-			wmodel.setDocumentFrequency((double)lEntry.n_t);
-			wmodel.setTermFrequency((double)lEntry.TF);
+			wmodel.setDocumentFrequency((double)lEntry.getDocumentFrequency());
+			wmodel.setTermFrequency((double)lEntry.getFrequency());
 			
-			logger.debug((i + 1) + ": " + queryTermStrings[i].trim() + " with " + lEntry.n_t + " documents (TF is " + lEntry.TF + ").");
+			logger.debug((i + 1) + ": " + queryTermStrings[i].trim() + " with " + lEntry.getDocumentFrequency() 
+					+ " documents (TF is " + lEntry.getFrequency() + ").");
 
 
 			//check if the IDF is very low.
-			if (IGNORE_LOW_IDF_TERMS && docIndex.getNumberOfDocuments() < lEntry.TF) {
+			if (IGNORE_LOW_IDF_TERMS && docIndex.getNumberOfDocuments() < lEntry.getFrequency()) {
 				logger.debug("query term " + queryTermStrings[i] + " has low idf - ignored from scoring.");
 				continue;
 			}
Index: src/uk/ac/gla/terrier/matching/LMMatching.java
===================================================================
--- src/uk/ac/gla/terrier/matching/LMMatching.java	(revision 2526)
+++ src/uk/ac/gla/terrier/matching/LMMatching.java	(working copy)
@@ -32,6 +32,7 @@
 import uk.ac.gla.terrier.matching.models.languagemodel.LanguageModel;
 import uk.ac.gla.terrier.matching.tsms.TermScoreModifier;
 import uk.ac.gla.terrier.structures.Index;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.structures.indexing.DocumentInitialWeightIndex;
 import uk.ac.gla.terrier.structures.indexing.TermEstimateIndex;
 import uk.ac.gla.terrier.utility.HeapSort;
@@ -165,10 +166,10 @@
 		final int queryLength = queryTermStrings.length;
 		for (int i = 0; i < queryLength; i++) {
 			//we seek the query term in the lexicon
-			boolean found = lexicon.findTerm(queryTermStrings[i]);
-			//and if it is not found, we continue with the next term
-			if (!found)
+			LexiconEntry le = lexicon.getLexiconEntry(queryTermStrings[i]);
+			if (le == null)
 				continue;
+			
 			//because when the TreeNode is created, the term
 			//code assigned is taken from
 			//the TermCodes class, the assigned term code is
@@ -176,31 +177,31 @@
 			//process. Therefore, at this point, the term
 			//code should be updated with the one
 			//stored in the lexicon file.	
-			queryTerms.setTermProperty(queryTermStrings[i], lexicon.getTermId());
+			queryTerms.setTermProperty(queryTermStrings[i], le.getTermId());
 			if(logger.isDebugEnabled()){
-				logger.debug("" + (i + 1) + ": " + queryTermStrings[i].trim() + "(" + lexicon.getTermId() + ")");
+				logger.debug("" + (i + 1) + ": " + queryTermStrings[i].trim() + "(" + le.getTermId() + ")");
 			}
 			//the weighting model is prepared for assigning scores to documents
-			wmodel.setTermFrequency((double)lexicon.getTF());
-			this.termFrequency[i] = (double)lexicon.getTF();
-			this.termEstimates[i] = this.termEstimateIndex.getTermEstimateByTermid(lexicon.getTermId());
+			wmodel.setTermFrequency((double)le.getFrequency());
+			this.termFrequency[i] = (double)le.getFrequency();
+			this.termEstimates[i] = this.termEstimateIndex.getTermEstimateByTermid(le.getTermId());
 			if(logger.isDebugEnabled()){
 				logger.debug(
 					" with "
-						+ lexicon.getNt()
+						+ le.getDocumentFrequency()
 						+ " documents (TF is "
-						+ lexicon.getTF()
+						+ le.getFrequency()
 						+ ").");
 			}
 			//check if the IDF is very low.
 			if(logger.isInfoEnabled()){
-				if (IGNORE_LOW_IDF_TERMS==true && docIndex.getNumberOfDocuments() < lexicon.getTF()) {
+				if (IGNORE_LOW_IDF_TERMS==true && docIndex.getNumberOfDocuments() < le.getFrequency()) {
 					logger.info("query term " + queryTermStrings[i] + " has low idf - ignored from scoring.");
 					continue;
 				}
 			}
 			//the postings are beign read from the inverted file.
-			pointers = invertedIndex.getDocuments(queryTerms.getTermCode(queryTermStrings[i]));
+			pointers = invertedIndex.getDocuments(le);
 			
 			init_tf(i, pointers);
 			
Index: src/uk/ac/gla/terrier/matching/dsms/PhraseScoreModifier.java
===================================================================
--- src/uk/ac/gla/terrier/matching/dsms/PhraseScoreModifier.java	(revision 2526)
+++ src/uk/ac/gla/terrier/matching/dsms/PhraseScoreModifier.java	(working copy)
@@ -26,22 +26,22 @@
  */
 package uk.ac.gla.terrier.matching.dsms;
 
-import gnu.trove.TIntArrayList;
 import gnu.trove.TIntIntHashMap;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 
+import org.apache.log4j.Logger;
+
 import uk.ac.gla.terrier.matching.MatchingQueryTerms;
 import uk.ac.gla.terrier.matching.ResultSet;
 import uk.ac.gla.terrier.querying.parser.SingleTermQuery;
 import uk.ac.gla.terrier.structures.BlockInvertedIndex;
 import uk.ac.gla.terrier.structures.Index;
 import uk.ac.gla.terrier.structures.InvertedIndex;
+import uk.ac.gla.terrier.structures.LexiconEntry;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 
-import org.apache.log4j.Logger;
-
 /**
  * Modifies the scores of the documents which contain, or do not contain a given
  * phrase.
@@ -186,14 +186,15 @@
 		for (int i = 0; i < phraseLength; i++) {
 			docidsMap[i] = new TIntIntHashMap();
 			String t = ((SingleTermQuery) phraseTerms.get(i)).getTerm();
-			if (terms.getTermCode(t) == -1) {
-				index.getLexicon().findTerm(t);
-				int termCode = index.getLexicon().getTermId();
-				terms.setTermProperty(t, termCode);
+			if (terms.getStatistics(t) == null)
+			{
+				LexiconEntry le = index.getLexicon().getLexiconEntry(t);
+				if (le == null)
+					continue;
+				terms.setTermProperty(t, le);
 			}
 
-			int termCode = terms.getTermCode(t);
-			if (termCode != -1) {
+			
 				//for each phrase term, we store the identifiers of
 				//documents that contain that term in a hashmap
 				//we also convert the block frequencies into
@@ -204,7 +205,7 @@
 				//For j-th document in the postings lists postings[i]
 				//the positions start at postings[i][4][postings[i][3][j-1]]
 				//and end at postings[i][4][postings[i][3][j]-1]
-				postings[i] = invIndex.getDocuments(terms.getTermCode(t));
+			postings[i] = invIndex.getDocuments((LexiconEntry)terms.getStatistics(t));
 
 				for (int j = 0; j < postings[i][0].length; j++) {
 					//note that the entries in the docidsMap hash sets have
@@ -213,7 +214,6 @@
 					if (j > 0)
 						postings[i][3][j] += postings[i][3][j - 1];
 				}
-			}
 
 		}
 		try {
Index: src/uk/ac/gla/terrier/matching/dsms/BlockScoreModifier.java
===================================================================
--- src/uk/ac/gla/terrier/matching/dsms/BlockScoreModifier.java	(revision 2526)
+++ src/uk/ac/gla/terrier/matching/dsms/BlockScoreModifier.java	(working copy)
@@ -65,7 +65,7 @@
 		if (invertedIndex instanceof BlockInvertedIndex && 
 				query.length() > 1 && query.length() < 5) {
 			
-			Lexicon lexicon = index.getLexicon();
+			Lexicon<String> lexicon = index.getLexicon();
 			
 			int[] docids = resultSet.getDocids();
 			double[] scores = resultSet.getScores();
@@ -118,7 +118,7 @@
 					continue;
 				//double term1KeyFrequency = query.getTermWeight(term1);
 				
-				double term1DocumentFrequency = (double)tEntry1.n_t;
+				double term1DocumentFrequency = (double)tEntry1.getDocumentFrequency();
 				
 				//we seek the 2nd query term in the lexicon
 				LexiconEntry tEntry2 = lexicon.getLexiconEntry(term2);
@@ -126,7 +126,7 @@
 				if (tEntry1 == null)
 					continue;
 				//double term2KeyFrequency = query.getTermWeight(term2);
-				double term2DocumentFrequency = (double)tEntry2.n_t;
+				double term2DocumentFrequency = (double)tEntry2.getDocumentFrequency();
 				term1Pointers = invertedIndex.getDocuments(tEntry1);
 				
 				term1docids = term1Pointers[0];
Index: src/uk/ac/gla/terrier/matching/MatchingQueryTerms.java
===================================================================
--- src/uk/ac/gla/terrier/matching/MatchingQueryTerms.java	(revision 2526)
+++ src/uk/ac/gla/terrier/matching/MatchingQueryTerms.java	(working copy)
@@ -34,6 +34,7 @@
 import uk.ac.gla.terrier.matching.dsms.DocumentScoreModifier;
 import uk.ac.gla.terrier.matching.tsms.TermScoreModifier;
 import uk.ac.gla.terrier.querying.parser.Query;
+import uk.ac.gla.terrier.structures.EntryStatistics;
 /**
  * Models a query used for matching documents. It is created
  * by creating an instance of this class, and then passing it as
@@ -44,11 +45,13 @@
  * @author Vassilis Plachouras, Craig Macdonald.
  * @version $Revision: 1.24 $
  */
-public class MatchingQueryTerms implements Serializable,Cloneable{
-		
+public class MatchingQueryTerms implements Serializable,Cloneable
+{
+	private static final long serialVersionUID = -9134975387300425203L;
 	/** The weight and the modifiers associated with a query term.*/
-	protected static class QueryTermProperties implements Serializable{
-		
+	protected static class QueryTermProperties implements Serializable
+	{
+		private static final long serialVersionUID = 6327392687128896557L;
 		
 		/** The weight of a query term. This is usually how many times the term occurred
 		  * in the query, but sometime may be altered if a weight has been specified on the
@@ -56,8 +59,8 @@
 		  * on the unparsed query (example <tt>term1 term2^3</tt>). */
 		double weight;
 		
-		/** The term code (identifier) of the query term.*/
-		int termCode;
+		/** Info about the query term.*/
+		EntryStatistics stats;
 		
 		/** The term score modifiers associated with a particular query term.*/
 		ArrayList<TermScoreModifier> modifiers = new ArrayList<TermScoreModifier>();
@@ -70,8 +73,8 @@
 		 * of a query term.
 		 * @param code int the term code of a query term. 
 		 */
-		public QueryTermProperties(int code) {
-			termCode = code;
+		public QueryTermProperties(EntryStatistics _stats) {
+			stats = _stats;
 		}
 		
 		/** 
@@ -106,9 +109,9 @@
 		 * @param w double the weight of a query term. 
 		 * @param code int the term code of a query term. 
 		 */
-		public QueryTermProperties(double w, int code) {
+		public QueryTermProperties(double w, EntryStatistics _stats) {
 			weight = w;
-			termCode = code;
+			stats = _stats;
 		}
 		
 		/**
@@ -117,9 +120,9 @@
 		 * @param tsm TermScoreModifier the modifier associated with a query term.
 		 * @param code int the term code of a query term. 
 		 */
-		public QueryTermProperties(TermScoreModifier tsm, int code) {
+		public QueryTermProperties(TermScoreModifier tsm, EntryStatistics _stats) {
 			modifiers.add(tsm);
-			termCode = code;
+			stats = _stats;
 		}
 		
 		/**
@@ -128,15 +131,15 @@
 		 * @param tsm TermScoreModifier the modifier associated with a query term.
 		 * @param code int the term code of a query term. 
 		 */
-		public QueryTermProperties(double w, TermScoreModifier tsm, int code) {
+		public QueryTermProperties(double w, TermScoreModifier tsm, EntryStatistics _stats) {
 			weight = w;
 			modifiers.add(tsm);
-			termCode = code;
+			stats = _stats;
 		}
 
 		public Object clone()
 		{
-			QueryTermProperties newO = new QueryTermProperties(weight, termCode);
+			QueryTermProperties newO = new QueryTermProperties(weight, stats);
 			for (TermScoreModifier tsm : modifiers)
 				newO.modifiers.add((TermScoreModifier)(tsm.clone()));
 			return (Object)newO;
@@ -144,7 +147,7 @@
 
 		public int hashCode()
 		{
-			int hashCodeValue = termCode;
+			int hashCodeValue = stats.hashCode();
 			hashCodeValue += (new Double(weight)).hashCode();
 			for (TermScoreModifier tsm : modifiers)
 			{
@@ -275,16 +278,16 @@
 	}
 	
 	/**
-	 * Sets the term integer identifier for the given query term.
+	 * Sets the term statistics for the given query term.
 	 * @param term String the term for which the term identifier is set.
-	 * @param code int the term identifier.
+	 * @param stats TermStatistics the statistics of the term.
 	 */
-	public void setTermProperty(String term, int code) {
+	public void setTermProperty(String term, EntryStatistics stats) {
 		QueryTermProperties properties = termProperties.get(term);
 		if (properties == null) {
-			termProperties.put(term, new QueryTermProperties(code));
+			termProperties.put(term, new QueryTermProperties(stats));
 		} else {
-			properties.termCode = code;
+			properties.stats = stats;
 		}
 	}
 	
@@ -364,11 +367,9 @@
 	 * @return int the term code of the given query term, or -1 if the term
 	 *         does not appear in the query.
 	 */
-	public int getTermCode(String term) {
-		QueryTermProperties tp = (QueryTermProperties)termProperties.get(term);
-		if (tp!=null)
-			return tp.termCode;
-		return -1;
+	public EntryStatistics getStatistics(String term) {
+		QueryTermProperties tp = termProperties.get(term);
+		return tp.stats;
 	}
 	
 	/** 
@@ -379,7 +380,7 @@
 	 *         of the query. 
 	 */
 	public TermScoreModifier[] getTermScoreModifiers(String term) {
-		QueryTermProperties tp = (QueryTermProperties)termProperties.get(term);
+		QueryTermProperties tp = termProperties.get(term);
 		if (tp!=null)
 			return (TermScoreModifier[])tp.modifiers.toArray(tmpTSM);
 		return null;
Index: src/TrecTerrier.java
===================================================================
--- src/TrecTerrier.java	(revision 2526)
+++ src/TrecTerrier.java	(working copy)
@@ -26,6 +26,7 @@
 import java.io.File;
 
 import org.apache.log4j.Logger;
+
 import uk.ac.gla.terrier.applications.HadoopIndexing;
 import uk.ac.gla.terrier.applications.TRECIndexing;
 import uk.ac.gla.terrier.applications.TRECLMIndexing;
@@ -37,10 +38,9 @@
 import uk.ac.gla.terrier.evaluation.NamedPageEvaluation;
 import uk.ac.gla.terrier.structures.DirectIndexInputStream;
 import uk.ac.gla.terrier.structures.DocumentIndexInputStream;
-import uk.ac.gla.terrier.structures.InvertedIndexInputStream;
-import uk.ac.gla.terrier.structures.InvertedIndex;
 import uk.ac.gla.terrier.structures.Index;
-import uk.ac.gla.terrier.structures.LexiconInputStream;
+import uk.ac.gla.terrier.structures.InvertedIndexInputStream;
+import uk.ac.gla.terrier.structures.LexiconUtil;
 import uk.ac.gla.terrier.utility.ApplicationSetup;
 import uk.ac.gla.terrier.utility.Files;
 /**
@@ -330,12 +330,6 @@
 		if (printdocid && !Files.exists(ApplicationSetup.DOCUMENT_INDEX_FILENAME))
 			return ERROR_PRINT_DOCINDEX_FILE_NOT_EXISTS;
 		
-		if (printlexicon && !Files.exists(ApplicationSetup.LEXICON_FILENAME))
-			return ERROR_PRINT_LEXICON_FILE_NOT_EXISTS;
-		
-		if (printinverted && !Files.exists(ApplicationSetup.INVERTED_FILENAME))
-			return ERROR_PRINT_INVERTED_FILE_NOT_EXISTS;
-		
 		if (printdirect && !Files.exists(ApplicationSetup.DIRECT_FILENAME))
 			return ERROR_PRINT_DIRECT_FILE_NOT_EXISTS;
 		
@@ -425,33 +419,31 @@
 			i.close();
 		} else if (printlexicon) {
 			Index i = Index.createIndex();
-			LexiconInputStream lex = (LexiconInputStream)(i.getIndexStructureInputStream("lexicon"));
-			lex.print();
-			lex.close();
-			i.close();
+			LexiconUtil.printLexicon(i, "lexicon");
 		} else if (printdirect) {
 			Index i = Index.createIndex();
+			if (! i.hasIndexStructureInputStream("direct"))
+			{
+				logger.warn("Sorry, no direct index structure in index");
+			}
+			else
+			{
 			DirectIndexInputStream dirIndex = (DirectIndexInputStream)(i.getIndexStructureInputStream("direct"));
 			dirIndex.print();
 			dirIndex.close();
 			i.close();
+			}
 		} else if (printinverted) {
 			Index i = Index.createIndex();
-			if (i.hasIndexStructureInputStream("inverted"))//some dont yet have appropriate input stream implementations
+			if (i.hasIndexStructureInputStream("inverted"))
 			{
 				InvertedIndexInputStream invIndex = (InvertedIndexInputStream)(i.getIndexStructureInputStream("inverted"));
 				invIndex.print();
 				invIndex.close();
 			}
-			else if (i.hasIndexStructure("inverted"))
-			{
-				InvertedIndex invIndex = (InvertedIndex)i.getIndexStructure("inverted");
-				invIndex.print();
-				invIndex.close();
-			}
 			else
 			{
-				logger.warn("Sorry, no inverted index structure in index");
+				logger.warn("Sorry, no inverted index inputstream structure in index");
 			}
 			i.close();
 		} else if (printstats) {
@@ -527,12 +519,6 @@
 			case ERROR_PRINT_DOCINDEX_FILE_NOT_EXISTS :
 				System.err.println("The specified document index file does not exist.");
 				break;
-			case ERROR_PRINT_LEXICON_FILE_NOT_EXISTS : 
-				System.err.println("The specified lexicon file ("+ApplicationSetup.LEXICON_FILENAME+") does not exist.");
-				break;
-			case ERROR_PRINT_INVERTED_FILE_NOT_EXISTS : 
-				System.err.println("The specified inverted index does not exist.");
-				break;
 			case ERROR_PRINT_DIRECT_FILE_NOT_EXISTS : 
 				System.err.println("The specified direct index does not exist.");
 				break;
@@ -571,7 +557,6 @@
 	protected static final int ERROR_NO_C_VALUE = 2;
 	protected static final int ERROR_CONFLICTING_ARGUMENTS = 3;
 	protected static final int ERROR_DIRECT_FILE_EXISTS = 4;
-	protected static final int ERROR_INVERTED_FILE_EXISTS = 5;
 	protected static final int ERROR_DIRECT_FILE_NOT_EXISTS = 6;
 	protected static final int ERROR_PRINT_DOCINDEX_FILE_NOT_EXISTS = 7;
 	protected static final int ERROR_PRINT_LEXICON_FILE_NOT_EXISTS = 8;

