Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2814897

Browse files
Lukas Rupprechtallisonport-db
authored andcommitted
Fixes bug in DeltaTableUtils.findDeltaTableRoot
Previously, DeltaTableUtils.findDeltaTableRoot would throw an exception if it is passed a base path that is converted to a Uri with an empty path component (e.g. `s3://my-bucket`). This PR catches such cases and prepends a slash when combining a base path with a _delta_log subdirectory. It also adds a new test suite for DeltaTableUtils. GitOrigin-RevId: ebf74770dc3b0cdfddeadb97114d38cb00802995
1 parent c2baa30 commit 2814897

File tree

2 files changed

+65
-1
lines changed

2 files changed

+65
-1
lines changed

core/src/main/scala/org/apache/spark/sql/delta/DeltaTable.scala

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ object DeltaTableUtils extends PredicateHelper
193193
var currentPath = path
194194
while (currentPath != null && currentPath.getName != "_delta_log" &&
195195
currentPath.getName != "_samples") {
196-
val deltaLogPath = new Path(currentPath, "_delta_log")
196+
val deltaLogPath = safeConcatPaths(currentPath, "_delta_log")
197197
if (Try(fs.exists(deltaLogPath)).getOrElse(false)) {
198198
return Option(currentPath)
199199
}
@@ -397,4 +397,25 @@ object DeltaTableUtils extends PredicateHelper
397397
def parseColToTransform(col: String): IdentityTransform = {
398398
IdentityTransform(FieldReference(Seq(col)))
399399
}
400+
401+
/**
402+
* Uses org.apache.hadoop.fs.Path(Path, String) to concatenate a base path
403+
* and a relative child path and safely handles the case where the base path represents
404+
* a Uri with an empty path component (e.g. s3://my-bucket, where my-bucket would be
405+
* interpreted as the Uri authority).
406+
*
407+
* In that case, the child path is converted to an absolute path at the root, i.e. /childPath.
408+
* This prevents a "URISyntaxException: Relative path in absolute URI", which would be thrown
409+
* by org.apache.hadoop.fs.Path(Path, String) because it tries to convert the base path to a Uri
410+
* and then resolve the child on top of it. This is invalid for an empty base path and a
411+
* relative child path according to the Uri specification, which states that if an authority
412+
* is defined, the path component needs to be either empty or start with a '/'.
413+
*/
414+
def safeConcatPaths(basePath: Path, relativeChildPath: String): Path = {
415+
if (basePath.toUri.getPath.isEmpty) {
416+
new Path(basePath, s"/$relativeChildPath")
417+
} else {
418+
new Path(basePath, relativeChildPath)
419+
}
420+
}
400421
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright (2021) The Delta Lake Project Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.apache.spark.sql.delta
18+
19+
// scalastyle:off import.ordering.noEmptyLine
20+
import org.apache.hadoop.fs.Path
21+
22+
import org.apache.spark.SparkConf
23+
import org.apache.spark.sql.test.SharedSparkSession
24+
25+
class DeltaTableUtilsSuite extends SharedSparkSession {
26+
27+
test("findDeltaTableRoot correctly combines paths") {
28+
withTempDir { dir =>
29+
sql(s"CREATE TABLE myTable (id INT) USING DELTA LOCATION '${dir.getAbsolutePath}'")
30+
val path = new Path(s"file://${dir.getAbsolutePath}")
31+
assert(DeltaTableUtils.findDeltaTableRoot(spark, path).contains(path))
32+
}
33+
}
34+
35+
test("safeConcatPaths") {
36+
val basePath = new Path("s3://my-bucket/subfolder")
37+
val basePathEmpty = new Path("s3://my-bucket")
38+
assert(DeltaTableUtils.safeConcatPaths(basePath, "_delta_log") ==
39+
new Path("s3://my-bucket/subfolder/_delta_log"))
40+
assert(DeltaTableUtils.safeConcatPaths(basePathEmpty, "_delta_log") ==
41+
new Path("s3://my-bucket/_delta_log"))
42+
}
43+
}

0 commit comments

Comments
 (0)