Terrier Core

FSOrderedMapFile.EntryIterator.skip() breaks FSOrderedMapFile.EntryIterator.hasNext()

Details

  • Type: Bug Bug
  • Status: Resolved Resolved
  • Priority: Trivial Trivial
  • Resolution: Fixed
  • Affects Version/s: 3.0
  • Fix Version/s: 3.0
  • Component/s: .structures
  • Description:
    FSOrderedMapFile.EntryIterator.skip() breaks FSOrderedMapFile.EntryIterator.hasNext().

    This only seems to affect a corner case in Inverted2DirectMR.

Activity

Hide
Craig Macdonald added a comment - 20/Oct/09 10:49 AM

Here's the patch, including test case:

Index: src/uk/ac/gla/terrier/structures/collections/FSOrderedMapFile.java
===================================================================
--- src/uk/ac/gla/terrier/structures/collections/FSOrderedMapFile.java	(revision 2750)
+++ src/uk/ac/gla/terrier/structures/collections/FSOrderedMapFile.java	(working copy)
@@ -191,6 +191,7 @@
 				di.skipBytes(toSkip);
 				actualSkipped += (long)toSkip;
 			}
+			counter += numEntries;
 		}
     }
     
Index: src_test/uk/ac/gla/terrier/structures/collections/TestFSOrderedMapFile.java
===================================================================
--- src_test/uk/ac/gla/terrier/structures/collections/TestFSOrderedMapFile.java	(revision 2694)
+++ src_test/uk/ac/gla/terrier/structures/collections/TestFSOrderedMapFile.java	(working copy)
@@ -12,6 +12,7 @@
 import org.apache.hadoop.io.Text;
 import org.junit.Test;
 
+import uk.ac.gla.terrier.structures.Skipable;
 import uk.ac.gla.terrier.structures.collections.FSOrderedMapFile.MapFileWriter;
 import uk.ac.gla.terrier.structures.seralization.FixedSizeIntWritableFactory;
 import uk.ac.gla.terrier.structures.seralization.FixedSizeTextFactory;
@@ -94,6 +95,23 @@
 		assertTrue(copyKey2Id.size() == 0);
 	}
 	
+	protected void readStreamSkip(Iterator<Map.Entry<Text, IntWritable>> iterator, int totalNumEntries) throws Exception
+	{
+		int skip = 3;
+		int entryIndex = 0;
+		((Skipable)iterator).skip(skip);
+		entryIndex += skip;
+		while(iterator.hasNext())
+		{
+			Map.Entry<Text, IntWritable> e = iterator.next();
+			assertNotNull(e);
+			assertNotNull(e.getKey());
+			assertEquals(testKeys[entryIndex], e.getKey().toString());
+			entryIndex++;
+		}
+		assertEquals(testKeys.length, entryIndex);
+	}
+	
 	@Test public void testStream() throws Exception
 	{
 		FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
@@ -103,6 +121,15 @@
 		inputStream.close();
 	}
 	
+	@Test public void testStreamSkip() throws Exception
+	{
+		FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
+		FSOrderedMapFile.EntryIterator<Text, IntWritable> inputStream = new FSOrderedMapFile.EntryIterator<Text, IntWritable>(
+				file, keyFactory, new FixedSizeIntWritableFactory());
+		readStreamSkip(inputStream, testKeys.length);
+		inputStream.close();
+	}
+	
 	@Test public void testOnDisk() throws Exception
 	{
 		FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
Show
Craig Macdonald added a comment - 20/Oct/09 10:49 AM Here's the patch, including test case:
Index: src/uk/ac/gla/terrier/structures/collections/FSOrderedMapFile.java
===================================================================
--- src/uk/ac/gla/terrier/structures/collections/FSOrderedMapFile.java	(revision 2750)
+++ src/uk/ac/gla/terrier/structures/collections/FSOrderedMapFile.java	(working copy)
@@ -191,6 +191,7 @@
 				di.skipBytes(toSkip);
 				actualSkipped += (long)toSkip;
 			}
+			counter += numEntries;
 		}
     }
     
Index: src_test/uk/ac/gla/terrier/structures/collections/TestFSOrderedMapFile.java
===================================================================
--- src_test/uk/ac/gla/terrier/structures/collections/TestFSOrderedMapFile.java	(revision 2694)
+++ src_test/uk/ac/gla/terrier/structures/collections/TestFSOrderedMapFile.java	(working copy)
@@ -12,6 +12,7 @@
 import org.apache.hadoop.io.Text;
 import org.junit.Test;
 
+import uk.ac.gla.terrier.structures.Skipable;
 import uk.ac.gla.terrier.structures.collections.FSOrderedMapFile.MapFileWriter;
 import uk.ac.gla.terrier.structures.seralization.FixedSizeIntWritableFactory;
 import uk.ac.gla.terrier.structures.seralization.FixedSizeTextFactory;
@@ -94,6 +95,23 @@
 		assertTrue(copyKey2Id.size() == 0);
 	}
 	
+	protected void readStreamSkip(Iterator<Map.Entry<Text, IntWritable>> iterator, int totalNumEntries) throws Exception
+	{
+		int skip = 3;
+		int entryIndex = 0;
+		((Skipable)iterator).skip(skip);
+		entryIndex += skip;
+		while(iterator.hasNext())
+		{
+			Map.Entry<Text, IntWritable> e = iterator.next();
+			assertNotNull(e);
+			assertNotNull(e.getKey());
+			assertEquals(testKeys[entryIndex], e.getKey().toString());
+			entryIndex++;
+		}
+		assertEquals(testKeys.length, entryIndex);
+	}
+	
 	@Test public void testStream() throws Exception
 	{
 		FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
@@ -103,6 +121,15 @@
 		inputStream.close();
 	}
 	
+	@Test public void testStreamSkip() throws Exception
+	{
+		FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
+		FSOrderedMapFile.EntryIterator<Text, IntWritable> inputStream = new FSOrderedMapFile.EntryIterator<Text, IntWritable>(
+				file, keyFactory, new FixedSizeIntWritableFactory());
+		readStreamSkip(inputStream, testKeys.length);
+		inputStream.close();
+	}
+	
 	@Test public void testOnDisk() throws Exception
 	{
 		FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
Hide
Craig Macdonald added a comment - 20/Oct/09 10:50 AM

Committed to trunk.

Show
Craig Macdonald added a comment - 20/Oct/09 10:50 AM Committed to trunk.

People

Dates

  • Created:
    20/Oct/09 10:48 AM
    Updated:
    05/Mar/10 5:15 PM
    Resolved:
    20/Oct/09 10:50 AM