Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 124 additions & 18 deletions core/src/main/java/org/apache/iceberg/rest/RESTUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.apache.hc.core5.net.PercentCodec;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
Expand Down Expand Up @@ -144,22 +145,31 @@ public static Map<String, String> decodeFormData(String formString) {
}

/**
* Encodes a string using URL encoding
* Encodes a string using application/x-www-form-urlencoded encoding, where spaces are encoded as
* {@code +}.
*
* <p>This method is suitable for encoding form data (e.g. OAuth2 token requests) but <b>not</b>
* for URL path segments, where {@code +} is a literal character. Use {@link
* #encodePathSegment(String)} for path segments.
*
* <p>{@link #decodeString(String)} should be used to decode.
*
* @param toEncode string to encode
* @return UTF-8 encoded string, suitable for use as a URL parameter
* @return form-encoded string, suitable for use in application/x-www-form-urlencoded content
*/
public static String encodeString(String toEncode) {
Preconditions.checkArgument(toEncode != null, "Invalid string to encode: null");
return URLEncoder.encode(toEncode, StandardCharsets.UTF_8);
}

/**
* Decodes a URL-encoded string.
* Decodes a string that was encoded using application/x-www-form-urlencoded encoding, where
* {@code +} is decoded as a space.
*
* <p>This method is suitable for decoding form data but <b>not</b> for URL path segments. Use
* {@link #decodePathSegment(String)} for path segments.
*
* <p>See also {@link #encodeString(String)} for URL encoding.
* <p>See also {@link #encodeString(String)} for form encoding.
*
* @param encoded a string to decode
* @return a decoded string
Expand All @@ -169,6 +179,39 @@ public static String decodeString(String encoded) {
return URLDecoder.decode(encoded, StandardCharsets.UTF_8);
}

/**
* Encodes a string for use as a URL path segment per RFC 3986. Spaces are encoded as {@code %20}
* (not {@code +}), and other non-unreserved characters are percent-encoded.
*
* <p>{@link #decodePathSegment(String)} should be used to decode.
*
* @param segment string to encode
* @return percent-encoded string suitable for use in URL path segments
*/
public static String encodePathSegment(String segment) {
Preconditions.checkArgument(segment != null, "Invalid string to encode: null");
return PercentCodec.RFC3986.encode(segment);
}

/**
* Decodes a URL path segment per RFC 3986. Unlike {@link #decodeString(String)}, this method does
* <b>not</b> treat {@code +} as a space — it is left as a literal {@code +} character.
*
* <p>Note: this method is introduced in this release but is not yet wired into server-side
* decoding paths (e.g. {@link #decodeNamespace(String, String)}). It will be adopted there in a
* future release, once updated clients that use {@link #encodePathSegment(String)} have been
* widely deployed.
*
* <p>See also {@link #encodePathSegment(String)} for encoding.
*
* @param encoded a percent-encoded path segment
* @return a decoded string
*/
public static String decodePathSegment(String encoded) {
Preconditions.checkArgument(encoded != null, "Invalid string to decode: null");
return PercentCodec.RFC3986.decode(encoded);
}

/**
* This converts the given namespace to a string and separates each part in a multipart namespace
* using the unicode character '\u001f'. Note that this method is different from {@link
Expand Down Expand Up @@ -254,12 +297,13 @@ public static Namespace namespaceFromQueryParam(
}

/**
* Returns a String representation of a namespace that is suitable for use in a URL / URI.
* Returns a String representation of a namespace that is suitable for use with
* application/x-www-form-urlencoded encoding.
*
* <p>This function needs to be called when a namespace is used as a path variable (or query
* parameter etc.), to format the namespace per the spec.
* <p>This function needs to be called when a namespace is used in a POST request body, to format
* the namespace per the spec.
*
* <p>{@link #decodeNamespace} should be used to parse the namespace from a URL parameter.
* <p>{@link #decodeNamespace} should be used to parse the namespace from a request body.
*
* @param ns namespace to encode
* @return UTF-8 encoded string representing the namespace, suitable for use as a URL parameter
Expand All @@ -272,13 +316,14 @@ public static String encodeNamespace(Namespace ns) {
}

/**
* Returns a String representation of a namespace that is suitable for use in a URL / URI.
* Returns a String representation of a namespace that is suitable for use with
* application/x-www-form-urlencoded encoding.
*
* <p>This function needs to be called when a namespace is used as a path variable (or query
* parameter etc.), to format the namespace per the spec.
* <p>This function needs to be called when a namespace is used in a POST request body, to format
* the namespace per the spec.
*
* <p>{@link RESTUtil#decodeNamespace(String, String)} should be used to parse the namespace from
* a URL parameter.
* a request body.
*
* @param namespace namespace to encode
* @param separator The namespace separator to be used for encoding. The separator will be used
Expand All @@ -300,10 +345,10 @@ public static String encodeNamespace(Namespace namespace, String separator) {
}

/**
* Takes in a string representation of a namespace as used for a URL parameter and returns the
* corresponding namespace.
* Takes in a string representation of a namespace encoded with application/x-www-form-urlencoded
* encoding, and returns the corresponding namespace.
*
* <p>See also {@link #encodeNamespace} for generating correctly formatted URLs.
* <p>See also {@link #encodeNamespace} for generating correctly formatted POST requests.
*
* @param encodedNs a namespace to decode
* @return a namespace
Expand All @@ -316,10 +361,10 @@ public static Namespace decodeNamespace(String encodedNs) {
}

/**
* Takes in a string representation of a namespace as used for a URL parameter and returns the
* corresponding namespace.
* Takes in a string representation of a namespace encoded with application/x-www-form-urlencoded
* encoding, and returns the corresponding namespace.
*
* <p>See also {@link #encodeNamespace} for generating correctly formatted URLs.
* <p>See also {@link #encodeNamespace} for generating correctly formatted POST requests.
*
* @param encodedNamespace a namespace to decode
* @param separator The namespace separator to be used as-is for decoding. This should be the same
Expand Down Expand Up @@ -348,6 +393,67 @@ public static Namespace decodeNamespace(String encodedNamespace, String separato
return Namespace.of(levels);
}

/**
* Returns a String representation of a namespace that is suitable for use in a URL path segment
* per RFC 3986. Spaces are encoded as {@code %20} (not {@code +}).
*
* <p>This method should be used instead of {@link #encodeNamespace(Namespace, String)} when the
* result is placed into a URL path.
*
* <p>{@link #decodeNamespaceAsPathSegment(String, String)} should be used to decode the result.
*
* @param namespace namespace to encode
* @param separator The namespace separator to be used for encoding. The separator will be used
* as-is and won't be encoded.
* @return percent-encoded string representing the namespace, suitable for use in URL path
* segments
*/
public static String encodeNamespaceAsPathSegment(Namespace namespace, String separator) {
Preconditions.checkArgument(namespace != null, "Invalid namespace: null");
Preconditions.checkArgument(
!Strings.isNullOrEmpty(separator), "Invalid separator: null or empty");
String[] levels = namespace.levels();
String[] encodedLevels = new String[levels.length];

for (int i = 0; i < levels.length; i++) {
encodedLevels[i] = encodePathSegment(levels[i]);
}

return Joiner.on(separator).join(encodedLevels);
}

/**
* Decodes a URL path segment per RFC 3986 into a namespace. Unlike {@link
* #decodeNamespace(String, String)}, this method does <b>not</b> treat {@code +} as a space.
*
* <p>{@link #encodeNamespaceAsPathSegment(Namespace, String)} should be used for encoding path
* segments.
*
* @param encodedNamespace a percent-encoded namespace path segment
* @param separator The namespace separator used during encoding
* @return a namespace
*/
public static Namespace decodeNamespaceAsPathSegment(String encodedNamespace, String separator) {
Preconditions.checkArgument(encodedNamespace != null, "Invalid namespace: null");
Preconditions.checkArgument(
!Strings.isNullOrEmpty(separator), "Invalid separator: null or empty");

// use legacy splitter for backwards compatibility in case an old client encoded the namespace
// with %1F
Splitter splitter =
Splitter.on(
encodedNamespace.contains(NAMESPACE_SEPARATOR_URLENCODED_UTF_8)
? NAMESPACE_SEPARATOR_URLENCODED_UTF_8
: separator);
String[] levels = Iterables.toArray(splitter.split(encodedNamespace), String.class);

for (int i = 0; i < levels.length; i++) {
levels[i] = decodePathSegment(levels[i]);
}

return Namespace.of(levels);
}

/**
* Returns the catalog URI suffixed by the relative endpoint path. If the endpoint path is an
* absolute path, then the absolute endpoint path is returned without using the catalog URI.
Expand Down
16 changes: 8 additions & 8 deletions core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public String table(TableIdentifier ident) {
"namespaces",
pathEncode(ident.namespace()),
"tables",
RESTUtil.encodeString(ident.name()));
RESTUtil.encodePathSegment(ident.name()));
}

public String register(Namespace ns) {
Expand All @@ -126,7 +126,7 @@ public String metrics(TableIdentifier identifier) {
"namespaces",
pathEncode(identifier.namespace()),
"tables",
RESTUtil.encodeString(identifier.name()),
RESTUtil.encodePathSegment(identifier.name()),
"metrics");
}

Expand All @@ -145,7 +145,7 @@ public String view(TableIdentifier ident) {
"namespaces",
pathEncode(ident.namespace()),
"views",
RESTUtil.encodeString(ident.name()));
RESTUtil.encodePathSegment(ident.name()));
}

public String renameView() {
Expand All @@ -163,7 +163,7 @@ public String planTableScan(TableIdentifier ident) {
"namespaces",
pathEncode(ident.namespace()),
"tables",
RESTUtil.encodeString(ident.name()),
RESTUtil.encodePathSegment(ident.name()),
"plan");
}

Expand All @@ -174,9 +174,9 @@ public String plan(TableIdentifier ident, String planId) {
"namespaces",
pathEncode(ident.namespace()),
"tables",
RESTUtil.encodeString(ident.name()),
RESTUtil.encodePathSegment(ident.name()),
"plan",
RESTUtil.encodeString(planId));
RESTUtil.encodePathSegment(planId));
}

public String fetchScanTasks(TableIdentifier ident) {
Expand All @@ -186,11 +186,11 @@ public String fetchScanTasks(TableIdentifier ident) {
"namespaces",
pathEncode(ident.namespace()),
"tables",
RESTUtil.encodeString(ident.name()),
RESTUtil.encodePathSegment(ident.name()),
"tasks");
}

private String pathEncode(Namespace ns) {
return RESTUtil.encodeNamespace(ns, namespaceSeparator);
return RESTUtil.encodeNamespaceAsPathSegment(ns, namespaceSeparator);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -723,22 +723,22 @@ public static void configureResponseFromException(
}

private static Namespace namespaceFromPathVars(Map<String, String> pathVars) {
return RESTUtil.decodeNamespace(
return RESTUtil.decodeNamespaceAsPathSegment(
pathVars.get("namespace"), NAMESPACE_SEPARATOR_URLENCODED_UTF_8);
}

private static TableIdentifier tableIdentFromPathVars(Map<String, String> pathVars) {
return TableIdentifier.of(
namespaceFromPathVars(pathVars), RESTUtil.decodeString(pathVars.get("table")));
namespaceFromPathVars(pathVars), RESTUtil.decodePathSegment(pathVars.get("table")));
}

private static TableIdentifier viewIdentFromPathVars(Map<String, String> pathVars) {
return TableIdentifier.of(
namespaceFromPathVars(pathVars), RESTUtil.decodeString(pathVars.get("view")));
namespaceFromPathVars(pathVars), RESTUtil.decodePathSegment(pathVars.get("view")));
}

private static String planIDFromPathVars(Map<String, String> pathVars) {
return RESTUtil.decodeString(pathVars.get("plan-id"));
return RESTUtil.decodePathSegment(pathVars.get("plan-id"));
}

private static SnapshotMode snapshotModeFromQueryParams(Map<String, String> queryParams) {
Expand Down
22 changes: 22 additions & 0 deletions core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -3954,6 +3954,28 @@ public Table registerTable(
}
}

@Test
public void testLoadTableWithSpecialChars() {
Namespace ns1 = Namespace.of("ns 1 ?=-+");
Namespace ns2 = Namespace.of("ns 1 ?=-+", "ns 2 ?=-+");

if (requiresNamespaceCreate()) {
restCatalog.createNamespace(ns1);
restCatalog.createNamespace(ns2);
}

TableIdentifier t1 = TableIdentifier.of(ns2, "table 1 ?=-+");

restCatalog.buildTable(t1, SCHEMA).create();
assertThat(restCatalog.tableExists(t1)).as("Table should exist").isTrue();

Table table = restCatalog.loadTable(t1);

String metadataFileLocation =
((HasTableOperations) table).operations().current().metadataFileLocation();
assertThat(metadataFileLocation).contains("ns 1 ?=-+/ns 2 ?=-+/table 1 ?=-+");
}

private RESTCatalog catalog(RESTCatalogAdapter adapter) {
RESTCatalog catalog =
new RESTCatalog(SessionCatalog.SessionContext.createEmpty(), (config) -> adapter);
Expand Down
Loading
Loading