From d4f5ce90ac97011f29bda00f8378371ed643c0ad Mon Sep 17 00:00:00 2001 From: RENE Date: Tue, 15 Jul 2025 21:39:38 +0900 Subject: [PATCH 1/3] [MINOR] Fix flag parsing bug in FileInterpreter Fixed a bug where the dash character (-) was incorrectly included as a flag when parsing command arguments. Now only the actual flag characters after the dash are added to the flags set. Added unit tests to verify the correct parsing behavior. --- .../apache/zeppelin/file/FileInterpreter.java | 2 +- .../zeppelin/file/FileInterpreterTest.java | 181 ++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 file/src/test/java/org/apache/zeppelin/file/FileInterpreterTest.java diff --git a/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java b/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java index 8275215efa5..37f97d523be 100644 --- a/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java +++ b/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java @@ -70,7 +70,7 @@ public CommandArgs(String cmd) { private void parseArg(String arg) { if (arg.charAt(0) == '-') { // handle flags - for (int i = 0; i < arg.length(); i++) { + for (int i = 1; i < arg.length(); i++) { Character c = arg.charAt(i); flags.add(c); } diff --git a/file/src/test/java/org/apache/zeppelin/file/FileInterpreterTest.java b/file/src/test/java/org/apache/zeppelin/file/FileInterpreterTest.java new file mode 100644 index 00000000000..c48e39ee7e2 --- /dev/null +++ b/file/src/test/java/org/apache/zeppelin/file/FileInterpreterTest.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.file; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; + +import java.util.Properties; + +import org.apache.zeppelin.interpreter.InterpreterException; +import org.junit.jupiter.api.Test; + +/** + * Tests for FileInterpreter CommandArgs parsing functionality. + */ +class FileInterpreterTest { + + /** + * Mock FileInterpreter for testing CommandArgs functionality + */ + private static class TestFileInterpreter extends FileInterpreter { + public TestFileInterpreter(Properties property) { + super(property); + } + + @Override + public String listAll(String path) throws InterpreterException { + return ""; + } + + @Override + public boolean isDirectory(String path) { + return true; + } + + @Override + public void open() { + } + + @Override + public void close() { + } + + // Expose CommandArgs for testing + public CommandArgs getCommandArgs(String cmd) { + CommandArgs args = new CommandArgs(cmd); + args.parseArgs(); + return args; + } + } + + @Test + void testCommandArgsParsing() { + TestFileInterpreter interpreter = new TestFileInterpreter(new Properties()); + + // Test simple command without flags + FileInterpreter.CommandArgs args1 = interpreter.getCommandArgs("ls"); + assertEquals("ls", args1.command); + assertEquals(0, args1.args.size()); + assertEquals(0, args1.flags.size()); + + // Test command with path + FileInterpreter.CommandArgs args2 = interpreter.getCommandArgs("ls /user"); + assertEquals("ls", args2.command); + assertEquals(1, args2.args.size()); + assertEquals("/user", args2.args.get(0)); + assertEquals(0, args2.flags.size()); + + // Test command with single flag + FileInterpreter.CommandArgs args3 = interpreter.getCommandArgs("ls -l"); + assertEquals("ls", args3.command); + assertEquals(0, args3.args.size()); + assertEquals(1, args3.flags.size()); + assertTrue(args3.flags.contains('l')); + assertFalse(args3.flags.contains('-')); + + // Test command with multiple flags + FileInterpreter.CommandArgs args4 = interpreter.getCommandArgs("ls -la"); + assertEquals("ls", args4.command); + assertEquals(0, args4.args.size()); + assertEquals(2, args4.flags.size()); + assertTrue(args4.flags.contains('l')); + assertTrue(args4.flags.contains('a')); + assertFalse(args4.flags.contains('-')); + + // Test command with flags and path + FileInterpreter.CommandArgs args5 = interpreter.getCommandArgs("ls -l /user"); + assertEquals("ls", args5.command); + assertEquals(1, args5.args.size()); + assertEquals("/user", args5.args.get(0)); + assertEquals(1, args5.flags.size()); + assertTrue(args5.flags.contains('l')); + assertFalse(args5.flags.contains('-')); + + // Test command with separate flags + FileInterpreter.CommandArgs args6 = interpreter.getCommandArgs("ls -l -h /user"); + assertEquals("ls", args6.command); + assertEquals(1, args6.args.size()); + assertEquals("/user", args6.args.get(0)); + assertEquals(2, args6.flags.size()); + assertTrue(args6.flags.contains('l')); + assertTrue(args6.flags.contains('h')); + assertFalse(args6.flags.contains('-')); + + // Test command with combined flags + FileInterpreter.CommandArgs args7 = interpreter.getCommandArgs("ls -lah /user"); + assertEquals("ls", args7.command); + assertEquals(1, args7.args.size()); + assertEquals("/user", args7.args.get(0)); + assertEquals(3, args7.flags.size()); + assertTrue(args7.flags.contains('l')); + assertTrue(args7.flags.contains('a')); + assertTrue(args7.flags.contains('h')); + assertFalse(args7.flags.contains('-')); + } + + @Test + void testCommandArgsWithDashNotInFlags() { + TestFileInterpreter interpreter = new TestFileInterpreter(new Properties()); + + // Test that dash character is not included in flags after fix + FileInterpreter.CommandArgs args = interpreter.getCommandArgs("ls -l"); + + // Verify dash is not in flags + assertFalse(args.flags.contains('-'), + "Dash character should not be included in flags"); + + // Verify correct flag is included + assertTrue(args.flags.contains('l'), + "Flag 'l' should be included"); + + // Verify flag count + assertEquals(1, args.flags.size(), + "Should only have one flag character"); + } + + @Test + void testEmptyFlags() { + TestFileInterpreter interpreter = new TestFileInterpreter(new Properties()); + + // Test empty flag (just dash) + FileInterpreter.CommandArgs args = interpreter.getCommandArgs("ls -"); + assertEquals("ls", args.command); + assertEquals(0, args.args.size()); + assertEquals(0, args.flags.size()); + } + + @Test + void testComplexCommand() { + TestFileInterpreter interpreter = new TestFileInterpreter(new Properties()); + + // Test complex command with multiple flags and arguments + FileInterpreter.CommandArgs args = interpreter.getCommandArgs("ls -la -h /user /tmp"); + assertEquals("ls", args.command); + assertEquals(2, args.args.size()); + assertEquals("/user", args.args.get(0)); + assertEquals("/tmp", args.args.get(1)); + assertEquals(3, args.flags.size()); + assertTrue(args.flags.contains('l')); + assertTrue(args.flags.contains('a')); + assertTrue(args.flags.contains('h')); + assertFalse(args.flags.contains('-')); + } +} \ No newline at end of file From ab2e9c25a0ac9fd437f3e2bbf51b0517131d9471 Mon Sep 17 00:00:00 2001 From: renechoi <115696395+renechoi@users.noreply.github.com> Date: Fri, 25 Jul 2025 00:11:12 +0900 Subject: [PATCH 2/3] [ZEPPELIN-6258] Improve Process resource management in SparkInterpreterLauncher - Added stdout consumption to prevent buffer overflow - Implemented process timeout (30 seconds) with forceful termination - Added exit value validation and logging - Ensured process cleanup in finally block - Maintained backward compatibility This prevents process hangs and ensures proper resource cleanup. --- .../launcher/SparkInterpreterLauncher.java | 64 +++++++++++++++---- .../SparkInterpreterLauncherTest.java | 26 ++++++++ 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java index 33b3e4ba62d..c52e85e192f 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java @@ -20,6 +20,7 @@ import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.IOException; +import java.io.InputStream; import java.net.URL; import java.net.URLClassLoader; import java.nio.charset.StandardCharsets; @@ -28,6 +29,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -35,6 +37,7 @@ import java.util.stream.StreamSupport; import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.lang3.StringUtils; import org.apache.zeppelin.conf.ZeppelinConfiguration; import org.apache.zeppelin.interpreter.recovery.RecoveryStorage; @@ -272,22 +275,57 @@ private String detectSparkScalaVersion(String sparkHome, Map env builder.environment().putAll(env); File processOutputFile = File.createTempFile("zeppelin-spark", ".out"); builder.redirectError(processOutputFile); + Process process = builder.start(); - process.waitFor(); - String processOutput = IOUtils.toString(new FileInputStream(processOutputFile), StandardCharsets.UTF_8); - Pattern pattern = Pattern.compile(".*Using Scala version (.*),.*"); - Matcher matcher = pattern.matcher(processOutput); - if (matcher.find()) { - String scalaVersion = matcher.group(1); - if (scalaVersion.startsWith("2.12")) { - return "2.12"; - } else if (scalaVersion.startsWith("2.13")) { - return "2.13"; + try { + // Consume stdout to prevent buffer overflow + try (InputStream stdout = process.getInputStream()) { + IOUtils.copy(stdout, NullOutputStream.NULL_OUTPUT_STREAM); + } + + // Wait with timeout (30 seconds) + boolean finished = process.waitFor(30, TimeUnit.SECONDS); + if (!finished) { + process.destroyForcibly(); + throw new IOException("spark-submit --version command timed out after 30 seconds"); + } + + // Check exit value + int exitValue = process.exitValue(); + if (exitValue != 0) { + LOGGER.warn("spark-submit --version exited with non-zero code: {}", exitValue); + } + + // Read the output from the file + String processOutput; + try (FileInputStream in = new FileInputStream(processOutputFile)) { + processOutput = IOUtils.toString(in, StandardCharsets.UTF_8); + } + + Pattern pattern = Pattern.compile(".*Using Scala version (.*),.*"); + Matcher matcher = pattern.matcher(processOutput); + if (matcher.find()) { + String scalaVersion = matcher.group(1); + if (scalaVersion.startsWith("2.12")) { + return "2.12"; + } else if (scalaVersion.startsWith("2.13")) { + return "2.13"; + } else { + throw new Exception("Unsupported scala version: " + scalaVersion); + } } else { - throw new Exception("Unsupported scala version: " + scalaVersion); + LOGGER.debug("Could not detect Scala version from spark-submit output, falling back to jar inspection"); + return detectSparkScalaVersionByReplClass(sparkHome); + } + } finally { + // Ensure process is cleaned up + if (process.isAlive()) { + process.destroyForcibly(); + } + // Clean up temporary file + if (!processOutputFile.delete() && processOutputFile.exists()) { + LOGGER.warn("Failed to delete temporary file: {}", processOutputFile.getAbsolutePath()); } - } else { - return detectSparkScalaVersionByReplClass(sparkHome); } } diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java index c1dc975b3c5..9378d823698 100644 --- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java @@ -31,10 +31,13 @@ import java.io.File; import java.io.IOException; +import java.lang.reflect.Method; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -325,4 +328,27 @@ void testYarnClusterMode_3() throws IOException { } FileUtils.deleteDirectory(localRepoPath.toFile()); } + + @Test + void testDetectSparkScalaVersionProcessManagement() throws Exception { + SparkInterpreterLauncher launcher = new SparkInterpreterLauncher(zConf, null); + + // Use reflection to access private method + Method detectSparkScalaVersionMethod = SparkInterpreterLauncher.class.getDeclaredMethod( + "detectSparkScalaVersion", String.class, Map.class); + detectSparkScalaVersionMethod.setAccessible(true); + + Map env = new HashMap<>(); + + // Call the method multiple times to ensure processes are properly cleaned + for (int i = 0; i < 3; i++) { + String scalaVersion = (String) detectSparkScalaVersionMethod.invoke(launcher, sparkHome, env); + assertTrue(scalaVersion.equals("2.12") || scalaVersion.equals("2.13"), + "Expected scala version 2.12 or 2.13 but got: " + scalaVersion); + } + + // Note: We cannot easily test that processes are destroyed or that stdout is consumed + // without mocking ProcessBuilder, which would require significant refactoring. + // The test above ensures the method still works correctly with the new implementation. + } } From 50bb5c7db30065a90f719ddf9cc4279c10a25af2 Mon Sep 17 00:00:00 2001 From: renechoi <115696395+renechoi@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:51:01 +0900 Subject: [PATCH 3/3] [ZEPPELIN-6268] Fix resource leaks and add null checks for getResourceAsStream --- .../zeppelin/alluxio/AlluxioInterpreterTest.java | 11 +++++++---- .../apache/zeppelin/python/IPythonInterpreter.java | 9 +++++++++ .../apache/zeppelin/python/PythonInterpreter.java | 13 ++++++++++--- .../java/org/apache/zeppelin/r/IRInterpreter.java | 3 +++ .../main/java/org/apache/zeppelin/r/ZeppelinR.java | 3 +++ 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java index 00405113a28..6c0f29df57b 100644 --- a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java +++ b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java @@ -224,9 +224,12 @@ void mkdirTest() throws IOException, AlluxioException { private void fileReadTest(String fileName, int size) throws IOException { File testFile = new File(PathUtils.concatPath(mLocalAlluxioCluster.getAlluxioHome(), fileName)); FileInputStream fis = new FileInputStream(testFile); - byte[] read = new byte[size]; - fis.read(read); - fis.close(); - assertTrue(BufferUtils.equalIncreasingByteArray(size, read)); + try { + byte[] read = new byte[size]; + fis.read(read); + assertTrue(BufferUtils.equalIncreasingByteArray(size, read)); + } finally { + fis.close(); + } } } diff --git a/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java b/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java index d40cef49b81..858fbf6c227 100644 --- a/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java +++ b/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java @@ -134,6 +134,9 @@ private void setupJVMGateway(String gatewayHost, int gatewayPort) throws IOExcep private void initPythonInterpreter(String gatewayHost, int gatewayPort) throws IOException { InputStream input = getClass().getClassLoader().getResourceAsStream("python/zeppelin_ipython.py"); + if (input == null) { + throw new IOException("Cannot find resource: python/zeppelin_ipython.py"); + } List lines = IOUtils.readLines(input, StandardCharsets.UTF_8); ExecuteResponse response = jupyterKernelClient.block_execute(ExecuteRequest.newBuilder() .setCode(StringUtils.join(lines, System.lineSeparator()) @@ -145,6 +148,9 @@ private void initPythonInterpreter(String gatewayHost, int gatewayPort) throws I input = getClass().getClassLoader().getResourceAsStream("python/zeppelin_context.py"); + if (input == null) { + throw new IOException("Cannot find resource: python/zeppelin_context.py"); + } lines = IOUtils.readLines(input, StandardCharsets.UTF_8); response = jupyterKernelClient.block_execute(ExecuteRequest.newBuilder() .setCode(StringUtils.join(lines, System.lineSeparator())).build()); @@ -161,6 +167,9 @@ private void initPythonInterpreter(String gatewayHost, int gatewayPort) throws I if (additionalPythonInitFile != null) { input = getClass().getClassLoader().getResourceAsStream(additionalPythonInitFile); + if (input == null) { + throw new IOException("Cannot find resource: " + additionalPythonInitFile); + } lines = IOUtils.readLines(input, StandardCharsets.UTF_8); response = jupyterKernelClient.block_execute(ExecuteRequest.newBuilder() .setCode(StringUtils.join(lines, System.lineSeparator()) diff --git a/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java b/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java index 46dbf8baddd..320fe69c1ff 100644 --- a/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java +++ b/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java @@ -43,6 +43,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.util.LinkedList; import java.util.List; @@ -196,15 +197,21 @@ protected boolean useIPython() { private void copyResourceToPythonWorkDir(String srcResourceName, String dstFileName) throws IOException { FileOutputStream out = null; + InputStream in = null; try { out = new FileOutputStream(pythonWorkDir.getAbsoluteFile() + "/" + dstFileName); - IOUtils.copy( - getClass().getClassLoader().getResourceAsStream(srcResourceName), - out); + in = getClass().getClassLoader().getResourceAsStream(srcResourceName); + if (in == null) { + throw new IOException("Cannot find resource: " + srcResourceName); + } + IOUtils.copy(in, out); } finally { if (out != null) { out.close(); } + if (in != null) { + in.close(); + } } } diff --git a/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java b/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java index 6407459354e..8536a3b3691 100644 --- a/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java +++ b/rlang/src/main/java/org/apache/zeppelin/r/IRInterpreter.java @@ -129,6 +129,9 @@ protected void initIRKernel() throws IOException, InterpreterException { String timeout = getProperty("spark.r.backendConnectionTimeout", "6000"); InputStream input = getClass().getClassLoader().getResourceAsStream("R/zeppelin_isparkr.R"); + if (input == null) { + throw new IOException("Cannot find resource: R/zeppelin_isparkr.R"); + } String code = IOUtils.toString(input, StandardCharsets.UTF_8) .replace("${Port}", sparkRBackend.port() + "") .replace("${version}", sparkVersion() + "") diff --git a/rlang/src/main/java/org/apache/zeppelin/r/ZeppelinR.java b/rlang/src/main/java/org/apache/zeppelin/r/ZeppelinR.java index 436da005106..3c03ecc8290 100644 --- a/rlang/src/main/java/org/apache/zeppelin/r/ZeppelinR.java +++ b/rlang/src/main/java/org/apache/zeppelin/r/ZeppelinR.java @@ -97,6 +97,9 @@ public void open() throws IOException, InterpreterException { try { out = new FileOutputStream(scriptFile); in = getClass().getClassLoader().getResourceAsStream("R/zeppelin_sparkr.R"); + if (in == null) { + throw new InterpreterException("Cannot find resource: R/zeppelin_sparkr.R"); + } IOUtils.copy(in, out); } catch (IOException e) { throw new InterpreterException(e);