Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
package com.twitter.algebird
package benchmark

import com.twitter.algebird.immutable.BitSet
import org.openjdk.jmh.annotations._

object BloomFilterDistanceBenchmark {

def toSparse[A](bf: BF[A]): BFSparse[A] = bf match {
case BFZero(hashes, width) => BFSparse(hashes, RichCBitSet(), width)
case BFItem(item, hashes, width) =>
BFSparse(hashes, RichCBitSet.fromArray(hashes(item)), width)
case bfs @ BFSparse(_, _, _) => bfs
case BFInstance(hashes, bitset, width) =>
BFSparse(hashes, RichCBitSet.fromBitSet(bitset), width)
}

def toDense[A](bf: BF[A]): BFInstance[A] = bf match {
case BFZero(hashes, width) => BFInstance.empty[A](hashes, width)
case BFItem(item, hashes, width) =>
val bs = LongBitSet.empty(width)
bs += hashes(item)
BFInstance(hashes, bs.toBitSetNoCopy, width)
case bfs @ BFSparse(_, _, _) => bfs.dense
val bs = hashes(item).foldLeft(BitSet.empty)(_ + _)
BFInstance(hashes, bs, width)
case bfi @ BFInstance(_, _, _) => bfi
}

Expand All @@ -39,15 +29,11 @@ object BloomFilterDistanceBenchmark {
BloomFilter[String](nbrOfElements, falsePositiveRate).zero

val sparseBF1: BF[String] =
toSparse(
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
)
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
val sparesBF2: BF[String] =
toSparse(
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
)
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)

val denseBF1: BF[String] = toDense(
BloomFilter[String](nbrOfElements, falsePositiveRate)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.twitter.algebird
package benchmark

import com.twitter.algebird.legacy._
import org.openjdk.jmh.annotations._

import scala.util.Random

object BloomFilterLegacyCreateBenchmark {

def createRandomString(nbrOfStrings: Int, lengthOfStrings: Int): Seq[String] =
Seq.fill(nbrOfStrings)(Random.nextString(lengthOfStrings))

@State(Scope.Benchmark)
class BloomFilterState {
@Param(Array("100", "1000", "10000"))
var nbrOfElements: Int = 0

@Param(Array("0.001", "0.01"))
var falsePositiveRate: Double = 0

var randomStrings: Seq[String] = _

@Setup(Level.Trial)
def setup(): Unit =
randomStrings = createRandomString(nbrOfElements, 10)

}
}

class BloomFilterLegacyCreateBenchmark {

import BloomFilterLegacyCreateBenchmark._

@Benchmark
def createBloomFilter(bloomFilterState: BloomFilterState): BF[String] = {
val bfMonoid = BloomFilter[String](bloomFilterState.nbrOfElements, bloomFilterState.falsePositiveRate)
val bf = bfMonoid.create(bloomFilterState.randomStrings: _*)
bf
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package com.twitter.algebird
package benchmark

import com.twitter.algebird.legacy._
import org.openjdk.jmh.annotations._

object BloomFilterLegacyDistanceBenchmark {

def toSparse[A](bf: BF[A]): BFSparse[A] = bf match {
case BFZero(hashes, width) => BFSparse(hashes, RichCBitSet(), width)
case BFItem(item, hashes, width) =>
BFSparse(hashes, RichCBitSet.fromArray(hashes(item)), width)
case bfs @ BFSparse(_, _, _) => bfs
case BFInstance(hashes, bitset, width) =>
BFSparse(hashes, RichCBitSet.fromBitSet(bitset), width)
}

def toDense[A](bf: BF[A]): BFInstance[A] = bf match {
case BFZero(hashes, width) => BFInstance.empty[A](hashes, width)
case BFItem(item, hashes, width) =>
val bs = LongBitSet.empty(width)
bs += hashes(item)
BFInstance(hashes, bs.toBitSetNoCopy, width)
case bfs @ BFSparse(_, _, _) => bfs.dense
case bfi @ BFInstance(_, _, _) => bfi
}

@State(Scope.Benchmark)
class BloomFilterState {

val nbrOfElements: Int = 1000
val falsePositiveRate = 0.01

def randomElements =
BloomFilterCreateBenchmark.createRandomString(nbrOfElements, 10)

val emptyBF1: BF[String] =
BloomFilter[String](nbrOfElements, falsePositiveRate).zero
val emptyBF2: BF[String] =
BloomFilter[String](nbrOfElements, falsePositiveRate).zero

val sparseBF1: BF[String] =
toSparse(
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
)
val sparesBF2: BF[String] =
toSparse(
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
)

val denseBF1: BF[String] = toDense(
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
)
val denseBF2: BF[String] = toDense(
BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomElements: _*)
)

}
}

class BloomFilterLegacyDistanceBenchmark {

import BloomFilterLegacyDistanceBenchmark._

@Benchmark
def distanceOfEmptyVsEmpty(bloomFilterState: BloomFilterState): Int =
bloomFilterState.emptyBF1.hammingDistance(bloomFilterState.emptyBF2)

@Benchmark
def distanceOfEmptyVsSparse(bloomFilterState: BloomFilterState): Int =
bloomFilterState.emptyBF1.hammingDistance(bloomFilterState.sparseBF1)

@Benchmark
def distanceOfEmptyVsDense(bloomFilterState: BloomFilterState): Int =
bloomFilterState.emptyBF1.hammingDistance(bloomFilterState.denseBF1)

@Benchmark
def distanceOfSparseVsSparse(bloomFilterState: BloomFilterState): Int =
bloomFilterState.sparseBF1.hammingDistance(bloomFilterState.sparesBF2)

@Benchmark
def distanceOfSparseVsDense(bloomFilterState: BloomFilterState): Int =
bloomFilterState.sparseBF1.hammingDistance(bloomFilterState.denseBF1)

@Benchmark
def distanceOfDenseVsDense(bloomFilterState: BloomFilterState): Int =
bloomFilterState.denseBF1.hammingDistance(bloomFilterState.denseBF1)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.twitter.algebird
package benchmark

import com.twitter.algebird.legacy._
import org.openjdk.jmh.annotations._

object BloomFilterLegacyQueryBenchmark {

@State(Scope.Benchmark)
class BloomFilterState {

@Param(Array("100", "1000", "10000"))
var nbrOfElements: Int = 0

@Param(Array("0.001", "0.01"))
var falsePositiveRate: Double = 0

var bf: BF[String] = _

@Setup(Level.Trial)
def setup(): Unit = {
val randomStrings =
BloomFilterCreateBenchmark.createRandomString(nbrOfElements, 10)
bf = BloomFilter[String](nbrOfElements, falsePositiveRate)
.create(randomStrings: _*)
}
}
}

class BloomFilterLegacyQueryBenchmark {
import BloomFilterLegacyQueryBenchmark._

@Benchmark
def queryBloomFilter(bloomFilterState: BloomFilterState): ApproximateBoolean =
bloomFilterState.bf.contains("1")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.twitter.algebird.immutable.compat

import com.twitter.algebird.immutable.BitSet

class BitSetWrapperSet(bitset: BitSet) extends Set[Int] {
def contains(i: Int): Boolean = bitset(i)
def iterator: Iterator[Int] = bitset.iterator
def +(i: Int): BitSetWrapperSet = new BitSetWrapperSet(bitset + i)
def -(i: Int): BitSetWrapperSet = new BitSetWrapperSet(bitset - i)
override def empty: Set[Int] = BitSet.Empty.toSet
}
Loading