Skip to content

Commit fe7da8a

Browse files
liyafan82Pindikura Ravindra
authored andcommitted
ARROW-6355: [Java] Make range equal visitor reusable
According to the discussion in apache#4993 (comment), we often encountered this scenario: we compare values repeatedly. The comparisons differs only in the parameters (vector to compare, start index, etc). According to the current API, we have to create a new RangeEqualVisitor object each time the comparison is performed. This leads to non-trivial performance overhead. To address this problem, we make the RangeEqualVisitor reusable, and allow the client to change parameters of an existing visitor. Closes apache#5195 from liyafan82/fly_0826_reuse and squashes the following commits: ffe0e6a <liyafan82> Merge pull request #1 from pravindra/pull-5195 073bc78 <Pindikura Ravindra> Test: Move out Range from the visitor params 7482414 <liyafan82> Wrapper visit parameters into a pojo 53c1e0b <liyafan82> Merge branch 'master' into fly_0826_reuse a1f7046 <liyafan82> Make range equal visitor reusable Lead-authored-by: liyafan82 <[email protected]> Co-authored-by: Pindikura Ravindra <[email protected]> Co-authored-by: liyafan82 <[email protected]> Signed-off-by: Pindikura Ravindra <[email protected]>
1 parent 02554c6 commit fe7da8a

File tree

9 files changed

+374
-367
lines changed

9 files changed

+374
-367
lines changed

algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.arrow.vector.BitVectorHelper;
2323
import org.apache.arrow.vector.IntVector;
2424
import org.apache.arrow.vector.ValueVector;
25+
import org.apache.arrow.vector.compare.Range;
2526
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
2627

2728
import io.netty.buffer.ArrowBuf;
@@ -43,11 +44,11 @@ public static <V extends ValueVector> void populateRunStartIndicators(V vector,
4344
runStarts.setZero(0, bufSize);
4445

4546
BitVectorHelper.setValidityBitToOne(runStarts, 0);
46-
47+
RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, vector, false);
48+
Range range = new Range(0, 0, 1);
4749
for (int i = 1; i < vector.getValueCount(); i++) {
48-
RangeEqualsVisitor visitor = new RangeEqualsVisitor(
49-
vector, i - 1, i, /* length */1, /* need check type*/false);
50-
if (!visitor.equals(vector)) {
50+
range.setLeftStart(i).setRightStart(i - 1);
51+
if (!visitor.rangeEquals(range)) {
5152
BitVectorHelper.setValidityBitToOne(runStarts, i);
5253
}
5354
}

vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java

Lines changed: 35 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,10 @@
1717

1818
package org.apache.arrow.vector.compare;
1919

20-
import java.util.List;
21-
2220
import org.apache.arrow.vector.BaseFixedWidthVector;
23-
import org.apache.arrow.vector.FieldVector;
2421
import org.apache.arrow.vector.Float4Vector;
2522
import org.apache.arrow.vector.Float8Vector;
2623
import org.apache.arrow.vector.ValueVector;
27-
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
28-
import org.apache.arrow.vector.complex.FixedSizeListVector;
29-
import org.apache.arrow.vector.complex.ListVector;
30-
import org.apache.arrow.vector.complex.NonNullableStructVector;
31-
import org.apache.arrow.vector.complex.UnionVector;
3224

3325
/**
3426
* Visitor to compare floating point.
@@ -49,24 +41,16 @@ public class ApproxEqualsVisitor extends RangeEqualsVisitor {
4941
private DiffFunction<Double> doubleDiffFunction =
5042
(Double value1, Double value2) -> Math.abs(value1 - value2);
5143

52-
public ApproxEqualsVisitor(ValueVector right, float epsilon) {
53-
this (right, epsilon, epsilon, true);
54-
}
55-
56-
public ApproxEqualsVisitor(ValueVector right, float floatEpsilon, double doubleEpsilon) {
57-
this (right, floatEpsilon, doubleEpsilon, true);
58-
}
59-
60-
public ApproxEqualsVisitor(ValueVector right, float floatEpsilon, double doubleEpsilon, boolean typeCheckNeeded) {
61-
this (right, floatEpsilon, doubleEpsilon, typeCheckNeeded, 0, 0, right.getValueCount());
62-
}
63-
6444
/**
65-
* Construct an instance.
45+
* Constructs a new instance.
46+
*
47+
* @param left left vector
48+
* @param right right vector
49+
* @param floatEpsilon difference for float values
50+
* @param doubleEpsilon difference for double values
6651
*/
67-
public ApproxEqualsVisitor(ValueVector right, float floatEpsilon, double doubleEpsilon, boolean typeCheckNeeded,
68-
int leftStart, int rightStart, int length) {
69-
super(right, rightStart, leftStart, length, typeCheckNeeded);
52+
public ApproxEqualsVisitor(ValueVector left, ValueVector right, float floatEpsilon, double doubleEpsilon) {
53+
super(left, right, true);
7054
this.floatEpsilon = floatEpsilon;
7155
this.doubleEpsilon = doubleEpsilon;
7256
}
@@ -80,151 +64,38 @@ public void setDoubleDiffFunction(DiffFunction<Double> doubleDiffFunction) {
8064
}
8165

8266
@Override
83-
public Boolean visit(BaseFixedWidthVector left, Void value) {
67+
public Boolean visit(BaseFixedWidthVector left, Range range) {
8468
if (left instanceof Float4Vector) {
85-
return validate(left) && float4ApproxEquals((Float4Vector) left);
69+
return float4ApproxEquals(range);
8670
} else if (left instanceof Float8Vector) {
87-
return validate(left) && float8ApproxEquals((Float8Vector) left);
71+
return float8ApproxEquals(range);
8872
} else {
89-
return super.visit(left, value);
73+
return super.visit(left, range);
9074
}
9175
}
9276

9377
@Override
94-
protected boolean compareUnionVectors(UnionVector left) {
95-
96-
UnionVector rightVector = (UnionVector) right;
97-
98-
List<FieldVector> leftChildren = left.getChildrenFromFields();
99-
List<FieldVector> rightChildren = rightVector.getChildrenFromFields();
100-
101-
if (leftChildren.size() != rightChildren.size()) {
102-
return false;
103-
}
104-
105-
for (int k = 0; k < leftChildren.size(); k++) {
106-
ApproxEqualsVisitor visitor = new ApproxEqualsVisitor(rightChildren.get(k),
107-
floatEpsilon, doubleEpsilon);
108-
if (!leftChildren.get(k).accept(visitor, null)) {
109-
return false;
110-
}
111-
}
112-
return true;
78+
protected ApproxEqualsVisitor createInnerVisitor(ValueVector left, ValueVector right) {
79+
return new ApproxEqualsVisitor(left, right, floatEpsilon, doubleEpsilon);
11380
}
11481

115-
@Override
116-
protected boolean compareStructVectors(NonNullableStructVector left) {
82+
private boolean float4ApproxEquals(Range range) {
83+
Float4Vector leftVector = (Float4Vector) getLeft();
84+
Float4Vector rightVector = (Float4Vector) getRight();
11785

118-
NonNullableStructVector rightVector = (NonNullableStructVector) right;
86+
for (int i = 0; i < range.getLength(); i++) {
87+
int leftIndex = range.getLeftStart() + i;
88+
int rightIndex = range.getRightStart() + i;
11989

120-
if (!left.getChildFieldNames().equals(rightVector.getChildFieldNames())) {
121-
return false;
122-
}
90+
boolean isNull = leftVector.isNull(leftIndex);
12391

124-
for (String name : left.getChildFieldNames()) {
125-
ApproxEqualsVisitor visitor = new ApproxEqualsVisitor(rightVector.getChild(name),
126-
floatEpsilon, doubleEpsilon);
127-
if (!left.getChild(name).accept(visitor, null)) {
92+
if (isNull != rightVector.isNull(rightIndex)) {
12893
return false;
12994
}
130-
}
131-
132-
return true;
133-
}
134-
135-
@Override
136-
protected boolean compareListVectors(ListVector left) {
137-
138-
for (int i = 0; i < length; i++) {
139-
int leftIndex = leftStart + i;
140-
int rightIndex = rightStart + i;
141-
142-
boolean isNull = left.isNull(leftIndex);
143-
if (isNull != right.isNull(rightIndex)) {
144-
return false;
145-
}
146-
147-
int offsetWidth = BaseRepeatedValueVector.OFFSET_WIDTH;
14895

14996
if (!isNull) {
150-
final int startIndexLeft = left.getOffsetBuffer().getInt(leftIndex * offsetWidth);
151-
final int endIndexLeft = left.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth);
152-
153-
final int startIndexRight = right.getOffsetBuffer().getInt(rightIndex * offsetWidth);
154-
final int endIndexRight = right.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth);
155-
156-
if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
157-
return false;
158-
}
159-
160-
ValueVector leftDataVector = left.getDataVector();
161-
ValueVector rightDataVector = ((ListVector)right).getDataVector();
162-
163-
if (!leftDataVector.accept(new ApproxEqualsVisitor(rightDataVector, floatEpsilon, doubleEpsilon,
164-
typeCheckNeeded, startIndexLeft, startIndexRight, (endIndexLeft - startIndexLeft)), null)) {
165-
return false;
166-
}
167-
}
168-
}
169-
return true;
170-
}
171-
172-
protected boolean compareFixedSizeListVectors(FixedSizeListVector left) {
173-
174-
if (left.getListSize() != ((FixedSizeListVector)right).getListSize()) {
175-
return false;
176-
}
177-
178-
for (int i = 0; i < length; i++) {
179-
int leftIndex = leftStart + i;
180-
int rightIndex = rightStart + i;
181-
182-
boolean isNull = left.isNull(leftIndex);
183-
if (isNull != right.isNull(rightIndex)) {
184-
return false;
185-
}
186-
187-
int listSize = left.getListSize();
188-
189-
if (!isNull) {
190-
final int startIndexLeft = leftIndex * listSize;
191-
final int endIndexLeft = (leftIndex + 1) * listSize;
192-
193-
final int startIndexRight = rightIndex * listSize;
194-
final int endIndexRight = (rightIndex + 1) * listSize;
195-
196-
if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
197-
return false;
198-
}
199-
200-
ValueVector leftDataVector = left.getDataVector();
201-
ValueVector rightDataVector = ((FixedSizeListVector)right).getDataVector();
202-
203-
if (!leftDataVector.accept(new ApproxEqualsVisitor(rightDataVector, floatEpsilon, doubleEpsilon,
204-
typeCheckNeeded, startIndexLeft, startIndexRight, (endIndexLeft - startIndexLeft)), null)) {
205-
return false;
206-
}
207-
}
208-
}
209-
return true;
210-
}
211-
212-
private boolean float4ApproxEquals(Float4Vector left) {
213-
214-
for (int i = 0; i < length; i++) {
215-
int leftIndex = leftStart + i;
216-
int rightIndex = rightStart + i;
217-
218-
boolean isNull = left.isNull(leftIndex);
219-
220-
if (isNull != right.isNull(rightIndex)) {
221-
return false;
222-
}
223-
224-
if (!isNull) {
225-
226-
Float leftValue = left.get(leftIndex);
227-
Float rightValue = ((Float4Vector)right).get(rightIndex);
97+
Float leftValue = leftVector.get(leftIndex);
98+
Float rightValue = rightVector.get(rightIndex);
22899
if (leftValue.isNaN()) {
229100
return rightValue.isNaN();
230101
}
@@ -239,21 +110,24 @@ private boolean float4ApproxEquals(Float4Vector left) {
239110
return true;
240111
}
241112

242-
private boolean float8ApproxEquals(Float8Vector left) {
243-
for (int i = 0; i < length; i++) {
244-
int leftIndex = leftStart + i;
245-
int rightIndex = rightStart + i;
113+
private boolean float8ApproxEquals(Range range) {
114+
Float8Vector leftVector = (Float8Vector) getLeft();
115+
Float8Vector rightVector = (Float8Vector) getRight();
116+
117+
for (int i = 0; i < range.getLength(); i++) {
118+
int leftIndex = range.getLeftStart() + i;
119+
int rightIndex = range.getRightStart() + i;
246120

247-
boolean isNull = left.isNull(leftIndex);
121+
boolean isNull = leftVector.isNull(leftIndex);
248122

249-
if (isNull != right.isNull(rightIndex)) {
123+
if (isNull != rightVector.isNull(rightIndex)) {
250124
return false;
251125
}
252126

253127
if (!isNull) {
254128

255-
Double leftValue = left.get(leftIndex);
256-
Double rightValue = ((Float8Vector)right).get(rightIndex);
129+
Double leftValue = leftVector.get(leftIndex);
130+
Double rightValue = rightVector.get(rightIndex);
257131
if (leftValue.isNaN()) {
258132
return rightValue.isNaN();
259133
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.vector.compare;
19+
20+
/**
21+
* Wrapper for the parameters of comparing a range of values in two vectors.
22+
*/
23+
public class Range {
24+
25+
/**
26+
* Start position in the left vector.
27+
*/
28+
private int leftStart = -1;
29+
30+
/**
31+
* Start position in the right vector.
32+
*/
33+
private int rightStart = -1;
34+
35+
/**
36+
* Length of the range.
37+
*/
38+
private int length = -1;
39+
40+
41+
/**
42+
* Constructs a new instance.
43+
*/
44+
public Range() {}
45+
46+
/**
47+
* Constructs a new instance.
48+
*
49+
* @param leftStart start index in left vector
50+
* @param rightStart start index in right vector
51+
* @param length length of range
52+
*/
53+
public Range(int leftStart, int rightStart, int length) {
54+
this.leftStart = leftStart;
55+
this.rightStart = rightStart;
56+
this.length = length;
57+
}
58+
59+
public int getLeftStart() {
60+
return leftStart;
61+
}
62+
63+
public int getRightStart() {
64+
return rightStart;
65+
}
66+
67+
public int getLength() {
68+
return length;
69+
}
70+
71+
public Range setLeftStart(int leftStart) {
72+
this.leftStart = leftStart;
73+
return this;
74+
}
75+
76+
public Range setRightStart(int rightStart) {
77+
this.rightStart = rightStart;
78+
return this;
79+
}
80+
81+
public Range setLength(int length) {
82+
this.length = length;
83+
return this;
84+
}
85+
}

0 commit comments

Comments
 (0)