Skip to content

Commit 76c958e

Browse files
Mike Pigottkou
authored andcommitted
ARROW-3965 [Java] JDBC-To-Arrow Configuration
https://issues.apache.org/jira/browse/ARROW-3965 This creates an object which configures the BaseAllocator and Calendar used during to configure the translation from a JDBC ResultSet to an Arrow vector. Author: Mike Pigott <[email protected]> Author: Michael Pigott <[email protected]> Closes apache#3133 from mikepigott/jdbc-to-arrow-config and squashes the following commits: be95426 <Mike Pigott> ARROW-3965: JDBC-To-Arrow Config Builder javadocs. d6c64a7 <Mike Pigott> ARROW-3965: JdbcToArrowConfigBuilder d7ca982 <Mike Pigott> Merge branch 'master' into jdbc-to-arrow-config 789c8c8 <Michael Pigott> Merge pull request #4 from apache/master e5b19ee <Michael Pigott> Merge pull request #3 from apache/master 3b17c29 <Michael Pigott> Merge pull request #2 from apache/master 5b1b364 <Mike Pigott> Merge branch 'master' into jdbc-to-arrow-config 881c6c8 <Michael Pigott> Merge pull request #1 from apache/master bb3165b <Mike Pigott> Updating the function calls to use the JdbcToArrowConfig versions. 68c91e7 <Mike Pigott> Modifying the jdbcToArrowSchema and jdbcToArrowVectors methods to receive JdbcToArrowConfig objects. 8d6cf00 <Mike Pigott> Documentation for public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) 4f1260c <Mike Pigott> Adding documentation for public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config) df632e3 <Mike Pigott> Updating the SQL tests to include JdbcToArrowConfig versions. b270044 <Mike Pigott> Updated validaton & documentation, and unit tests for the new JdbcToArrowConfig. da77cbe <Mike Pigott> Creating a configuration class for the JDBC-to-Arrow converter.
1 parent a90ce48 commit 76c958e

File tree

11 files changed

+410
-22
lines changed

11 files changed

+410
-22
lines changed

adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
8989
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
9090
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
9191

92-
return sqlToArrow(connection, query, allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
92+
JdbcToArrowConfig config =
93+
new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
94+
return sqlToArrow(connection, query, config);
9395
}
9496

9597
/**
@@ -115,8 +117,29 @@ public static VectorSchemaRoot sqlToArrow(
115117
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
116118
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
117119

120+
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
121+
}
122+
123+
/**
124+
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
125+
*
126+
* @param connection Database connection to be used. This method will not close the passed connection object.
127+
* Since the caller has passed the connection object it's the responsibility of the caller
128+
* to close or return the connection to the pool.
129+
* @param query The DB Query to fetch the data.
130+
* @param config Configuration
131+
* @return Arrow Data Objects {@link VectorSchemaRoot}
132+
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
133+
* ResultSet and Statement objects.
134+
*/
135+
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
136+
throws SQLException, IOException {
137+
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
138+
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
139+
Preconditions.checkNotNull(config, "The configuration cannot be null");
140+
118141
try (Statement stmt = connection.createStatement()) {
119-
return sqlToArrow(stmt.executeQuery(query), allocator, calendar);
142+
return sqlToArrow(stmt.executeQuery(query), config);
120143
}
121144
}
122145

@@ -147,7 +170,9 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
147170
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
148171
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
149172

150-
return sqlToArrow(resultSet, allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
173+
JdbcToArrowConfig config =
174+
new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
175+
return sqlToArrow(resultSet, config);
151176
}
152177

153178
/**
@@ -162,10 +187,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
162187
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
163188
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
164189

165-
RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);
166-
VectorSchemaRoot root = sqlToArrow(resultSet, rootAllocator, calendar);
167-
168-
return root;
190+
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
169191
}
170192

171193
/**
@@ -183,9 +205,25 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
183205
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
184206
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
185207

208+
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
209+
}
210+
211+
/**
212+
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
213+
*
214+
* @param resultSet ResultSet to use to fetch the data from underlying database
215+
* @param config Configuration of the conversion from JDBC to Arrow.
216+
* @return Arrow Data Objects {@link VectorSchemaRoot}
217+
* @throws SQLException on error
218+
*/
219+
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
220+
throws SQLException, IOException {
221+
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
222+
Preconditions.checkNotNull(config, "The configuration cannot be null");
223+
186224
VectorSchemaRoot root = VectorSchemaRoot.create(
187-
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), calendar), allocator);
188-
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, calendar);
225+
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
226+
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config);
189227
return root;
190228
}
191229
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.adapter.jdbc;
19+
20+
import java.util.Calendar;
21+
22+
import org.apache.arrow.memory.BaseAllocator;
23+
24+
import com.google.common.base.Preconditions;
25+
26+
/**
27+
* This class configures the JDBC-to-Arrow conversion process.
28+
* <p>
29+
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
30+
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
31+
* fields that are created during the conversion.
32+
* </p>
33+
* <p>
34+
* Neither field may be <code>null</code>.
35+
* </p>
36+
*/
37+
public final class JdbcToArrowConfig {
38+
private Calendar calendar;
39+
private BaseAllocator allocator;
40+
41+
/**
42+
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
43+
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
44+
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
45+
*
46+
* @param allocator The memory allocator to construct the Arrow vectors with.
47+
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
48+
*/
49+
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
50+
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
51+
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
52+
53+
this.allocator = allocator;
54+
this.calendar = calendar;
55+
}
56+
57+
/**
58+
* The calendar to use when defining Arrow Timestamp fields
59+
* and retrieving time-based fields from the database.
60+
* @return the calendar.
61+
*/
62+
public Calendar getCalendar() {
63+
return calendar;
64+
}
65+
66+
/**
67+
* The Arrow memory allocator.
68+
* @return the allocator.
69+
*/
70+
public BaseAllocator getAllocator() {
71+
return allocator;
72+
}
73+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.adapter.jdbc;
19+
20+
import java.util.Calendar;
21+
22+
import org.apache.arrow.memory.BaseAllocator;
23+
24+
import com.google.common.base.Preconditions;
25+
26+
/**
27+
* This class builds {@link JdbcToArrowConfig}s.
28+
*/
29+
public class JdbcToArrowConfigBuilder {
30+
private Calendar calendar;
31+
private BaseAllocator allocator;
32+
33+
/**
34+
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
35+
* Use the setter methods for the allocator and calendar; both must be
36+
* set. Otherwise, {@link #build()} will throw a {@link NullPointerException}.
37+
*/
38+
public JdbcToArrowConfigBuilder() {
39+
this.allocator = null;
40+
this.calendar = null;
41+
}
42+
43+
/**
44+
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the
45+
* allocator and calendar are required. A {@link NullPointerException}
46+
* will be thrown if one of the arguments is <code>null</code>.
47+
* <p>
48+
* The allocator is used to construct Arrow vectors from the JDBC ResultSet.
49+
* The calendar is used to determine the time zone of {@link java.sql.Timestamp}
50+
* fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
51+
* {@link java.sql.Timestamp} fields to a single, common time zone when reading
52+
* from the result set.
53+
* </p>
54+
*
55+
* @param allocator The Arrow Vector memory allocator.
56+
* @param calendar The calendar to use when constructing timestamp fields.
57+
*/
58+
public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar) {
59+
this();
60+
61+
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
62+
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
63+
64+
this.allocator = allocator;
65+
this.calendar = calendar;
66+
}
67+
68+
/**
69+
* Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet.
70+
*
71+
* @param allocator the allocator to set.
72+
* @exception NullPointerException if <code>allocator</code> is null.
73+
*/
74+
public JdbcToArrowConfigBuilder setAllocator(BaseAllocator allocator) {
75+
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
76+
this.allocator = allocator;
77+
return this;
78+
}
79+
80+
/**
81+
* Sets the {@link Calendar} to use when constructing timestamp fields in the
82+
* Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>.
83+
*
84+
* @param calendar the calendar to set.
85+
* @exception NullPointerExeption if <code>calendar</code> is <code>null</code>.
86+
*/
87+
public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
88+
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
89+
this.calendar = calendar;
90+
return this;
91+
}
92+
93+
/**
94+
* This builds the {@link JdbcToArrowConfig} from the provided
95+
* {@link BaseAllocator} and {@link Calendar}.
96+
*
97+
* @return The built {@link JdbcToArrowConfig}
98+
* @throws NullPointerException if either the allocator or calendar was not set.
99+
*/
100+
public JdbcToArrowConfig build() {
101+
return new JdbcToArrowConfig(allocator, calendar);
102+
}
103+
}

adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.Calendar;
3939
import java.util.List;
4040

41+
import org.apache.arrow.memory.RootAllocator;
4142
import org.apache.arrow.vector.BaseFixedWidthVector;
4243
import org.apache.arrow.vector.BigIntVector;
4344
import org.apache.arrow.vector.BitVector;
@@ -90,6 +91,21 @@ public class JdbcToArrowUtils {
9091
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
9192
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;
9293

94+
/**
95+
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
96+
*
97+
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
98+
* @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
99+
* @return {@link Schema}
100+
* @throws SQLException on error
101+
*/
102+
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
103+
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
104+
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
105+
106+
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
107+
}
108+
93109
/**
94110
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
95111
*
@@ -120,14 +136,14 @@ public class JdbcToArrowUtils {
120136
* CLOB --> ArrowType.Utf8
121137
* BLOB --> ArrowType.Binary
122138
*
123-
* @param rsmd ResultSetMetaData
139+
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
140+
* @param config The configuration to use when constructing the schema.
124141
* @return {@link Schema}
125142
* @throws SQLException on error
126143
*/
127-
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
128-
144+
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
129145
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
130-
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
146+
Preconditions.checkNotNull(config, "The configuration object must not be null");
131147

132148
List<Field> fields = new ArrayList<>();
133149
int columnCount = rsmd.getColumnCount();
@@ -179,7 +195,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
179195
break;
180196
case Types.TIMESTAMP:
181197
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND,
182-
calendar.getTimeZone().getID())), null));
198+
config.getCalendar().getTimeZone().getID())), null));
183199
break;
184200
case Types.BINARY:
185201
case Types.VARBINARY:
@@ -222,17 +238,37 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
222238
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
223239
* the given Arrow Vector objects.
224240
*
225-
* @param rs ResultSet to use to fetch the data from underlying database
226-
* @param root Arrow {@link VectorSchemaRoot} object to populate
241+
* @param rs ResultSet to use to fetch the data from underlying database
242+
* @param root Arrow {@link VectorSchemaRoot} object to populate
243+
* @param calendar The calendar to use when reading time-based data.
227244
* @throws SQLException on error
228245
*/
229246
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
230247
throws SQLException, IOException {
231248

232249
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
233-
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
250+
Preconditions.checkNotNull(root, "Vector Schema cannot be null");
234251
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
235252

253+
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
254+
}
255+
256+
/**
257+
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
258+
* the given Arrow Vector objects.
259+
*
260+
* @param rs ResultSet to use to fetch the data from underlying database
261+
* @param root Arrow {@link VectorSchemaRoot} object to populate
262+
* @param config The configuration to use when reading the data.
263+
* @throws SQLException on error
264+
*/
265+
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
266+
throws SQLException, IOException {
267+
268+
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
269+
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
270+
Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
271+
236272
ResultSetMetaData rsmd = rs.getMetaData();
237273
int columnCount = rsmd.getColumnCount();
238274

@@ -289,16 +325,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
289325
break;
290326
case Types.DATE:
291327
updateVector((DateMilliVector) root.getVector(columnName),
292-
rs.getDate(i, calendar), !rs.wasNull(), rowCount);
328+
rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount);
293329
break;
294330
case Types.TIME:
295331
updateVector((TimeMilliVector) root.getVector(columnName),
296-
rs.getTime(i, calendar), !rs.wasNull(), rowCount);
332+
rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount);
297333
break;
298334
case Types.TIMESTAMP:
299335
// TODO: Need to handle precision such as milli, micro, nano
300336
updateVector((TimeStampVector) root.getVector(columnName),
301-
rs.getTimestamp(i, calendar), !rs.wasNull(), rowCount);
337+
rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount);
302338
break;
303339
case Types.BINARY:
304340
case Types.VARBINARY:

0 commit comments

Comments
 (0)