Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ project.ext.spec = [
],
'scala' : [
'scala_library' : "org.scala-lang:scala-library:$ver.scala",
'scalatest' : "org.scalatest:scalatest_$ver.scala_rt:3.0.0",
'scalatest' : "org.scalatest:scalatest_$ver.scala_rt:3.2.9",
],
'avro' : "org.apache.avro:avro:1.10.2",
"avroUtil": "com.linkedin.avroutil1:helper-all:0.2.100",
Expand Down Expand Up @@ -153,7 +153,7 @@ project.ext.spec = [
"antlr": "org.antlr:antlr4:4.8",
"antlrRuntime": "org.antlr:antlr4-runtime:4.8",
"jsqlparser": "com.github.jsqlparser:jsqlparser:3.1",

"scalaTestPlus": "org.scalatestplus:mockito-3-4_2.12:3.3.0.0-SNAP3",
]
]

Expand Down
1 change: 1 addition & 0 deletions feathr-impl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ dependencies {
testImplementation spec.product.scala.scalatest
testImplementation spec.product.testing
testImplementation spec.product.jdiagnostics
testImplementation spec.product.scalaTestPlus
}

// Since there are cross-calls from Scala to Java, we use joint compiler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ import com.linkedin.feathr.common.exception.{ErrorLabel, FeathrInputDataExceptio
import com.linkedin.feathr.offline.config.location.DataLocation
import com.linkedin.feathr.offline.generation.SparkIOUtils
import com.linkedin.feathr.offline.job.DataSourceUtils.getSchemaFromAvroDataFile
import com.linkedin.feathr.offline.source.dataloader.DataLoaderHandler
import com.linkedin.feathr.offline.util.DelimiterUtils.checkDelimiterOption
import org.apache.avro.Schema
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.mapred.JobConf
Expand All @@ -16,7 +14,9 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
* @param ss the spark session
* @param path input data path
*/
private[offline] class BatchDataLoader(ss: SparkSession, location: DataLocation, dataLoaderHandlers: List[DataLoaderHandler]) extends DataLoader {
private[offline] class BatchDataLoader(val ss: SparkSession,
val location: DataLocation,
val dataLoaderHandlers: List[DataLoaderHandler]) extends DataLoader {

/**
* get the schema of the source. It's only used in the deprecated DataSource.getDataSetAndSchema
Expand Down Expand Up @@ -48,26 +48,23 @@ private[offline] class BatchDataLoader(ss: SparkSession, location: DataLocation,
* @return an dataframe
*/
override def loadDataFrame(): DataFrame = {
loadDataFrame(Map(), new JobConf(ss.sparkContext.hadoopConfiguration))
loadDataFrameWithRetry(Map(), new JobConf(ss.sparkContext.hadoopConfiguration), MAX_DATA_LOAD_RETRY)
}

/**
* load the source data as dataframe.
* @param dataIOParameters extra parameters
* @param jobConf Hadoop JobConf to be passed
* @param retry number of times to retry when data loading fails
* @return an dataframe
*/
def loadDataFrame(dataIOParameters: Map[String, String], jobConf: JobConf): DataFrame = {
def loadDataFrameWithRetry(dataIOParameters: Map[String, String], jobConf: JobConf, retry: Int): DataFrame = {
val sparkConf = ss.sparkContext.getConf
val inputSplitSize = sparkConf.get("spark.feathr.input.split.size", "")
val dataIOParametersWithSplitSize = Map(SparkIOUtils.SPLIT_SIZE -> inputSplitSize) ++ dataIOParameters
val dataPath = location.getPath

log.info(s"Loading ${location} as DataFrame, using parameters ${dataIOParametersWithSplitSize}")

// Get csvDelimiterOption set with spark.feathr.inputFormat.csvOptions.sep and check if it is set properly (Only for CSV and TSV)
val csvDelimiterOption = checkDelimiterOption(ss.sqlContext.getConf("spark.feathr.inputFormat.csvOptions.sep", ","))

try {
import scala.util.control.Breaks._

Expand All @@ -87,12 +84,20 @@ private[offline] class BatchDataLoader(ss: SparkSession, location: DataLocation,
}
df
} catch {
case feathrException: FeathrInputDataException =>
println(feathrException.toString)
throw feathrException // Throwing exception to avoid dataLoaderHandler hook exception from being swallowed.
case e: Throwable => //TODO: Analyze all thrown exceptions, instead of swalling them all, and reading as a csv
println(e.toString)
ss.read.format("csv").option("header", "true").option("delimiter", csvDelimiterOption).load(dataPath)
case _: Throwable =>
// If data loading from source failed, retry it automatically, as it might due to data source still being written into.
log.info(s"Loading ${location} failed, retrying for ${retry}-th time..")
if (retry > 0) {
Thread.sleep(DATA_LOAD_WAIT_IN_MS)
loadDataFrameWithRetry(dataIOParameters, jobConf, retry - 1)
} else {
// Throwing exception to avoid dataLoaderHandler hook exception from being swallowed.
throw new FeathrInputDataException(ErrorLabel.FEATHR_USER_ERROR, s"Failed to load ${dataPath} after ${MAX_DATA_LOAD_RETRY} retries.")
}
}
}
// Retry 2 times if data source loading fails
val MAX_DATA_LOAD_RETRY = 2
// Wait for 10 minutes and retry if data source loading fails
val DATA_LOAD_WAIT_IN_MS = 10*60*1000
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
import com.linkedin.feathr.common.types.PrimitiveType;
import java.util.Optional;

import org.scalatest.testng.TestNGSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import static java.util.Collections.*;
import static org.testng.Assert.*;


public class AutoTensorizableTypesTest extends TestNGSuite {
public class AutoTensorizableTypesTest {
private static final TensorType NTV_EQUIVALENT_TENSOR_TYPE = new TensorType(PrimitiveType.FLOAT, singletonList(
PrimitiveDimensionType.STRING));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.feathr.common;

import com.linkedin.feathr.common.tensor.TensorType;
import org.scalatest.testng.TestNGSuite;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand All @@ -11,7 +10,7 @@
* Unit tests for {@link FeatureTypeConfig}
*
*/
public class FeatureTypeConfigTest extends TestNGSuite {
public class FeatureTypeConfigTest {

@DataProvider
public Object[][] FeatureTypeConstructorWithAutoTZTestCases() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.scalatest.testng.TestNGSuite;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
Expand All @@ -21,7 +20,7 @@
import static org.testng.Assert.*;


public class TestFeatureDependencyGraph extends TestNGSuite {
public class TestFeatureDependencyGraph {

/*
Description of test scenario:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@

import nl.jqno.equalsverifier.EqualsVerifier;
import nl.jqno.equalsverifier.Warning;
import org.scalatest.testng.TestNGSuite;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import static java.util.Collections.*;


public class TestFeatureValue extends TestNGSuite {
public class TestFeatureValue {

private static final float DEFAULT_VALUE = FeatureValue.DEFAULT_VALUE;
private static final String EMPTY_TERM = FeatureValue.EMPTY_TERM;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
import java.util.Collections;

import com.linkedin.feathr.common.tensor.TensorType;
import org.scalatest.testng.TestNGSuite;
import org.testng.annotations.Test;

import static org.testng.Assert.*;


public class TestFeatureTypes extends TestNGSuite {
public class TestFeatureTypes {
@Test
public void checkBasicTypes() {
assertEquals(BooleanFeatureType.INSTANCE.getBasicType(), FeatureType.BasicType.BOOLEAN);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
import com.linkedin.feathr.common.tensor.TensorCategory;
import com.linkedin.feathr.common.tensor.TensorType;
import com.linkedin.feathr.common.value.QuinceFeatureTypeMapper;
import org.scalatest.testng.TestNGSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import static java.util.Collections.*;
import static org.testng.Assert.*;


public class TestQuinceFeatureTypeMapper extends TestNGSuite {
public class TestQuinceFeatureTypeMapper {
@DataProvider
public Object[][] quinceTypeMapperCases() {
return new Object[][] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.mvel2.MVEL;
import org.mvel2.ParserConfiguration;
import org.mvel2.ParserContext;
import org.scalatest.testng.TestNGSuite;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
Expand All @@ -17,7 +16,7 @@
/**
* Unit tests for {@link MvelContextUDFs} using expressions
*/
public class MvelUDFExpressionTests extends TestNGSuite {
public class MvelUDFExpressionTests {

private static final ParserConfiguration PARSER_CONFIG = new ParserConfiguration();
private ParserContext parserContext;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import java.util.List;
import java.util.Map;

import org.scalatest.testng.TestNGSuite;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand All @@ -23,7 +22,7 @@
/**
* Unit tests for {@link MvelContextUDFs}
*/
public class TestMvelContextUDFs extends TestNGSuite {
public class TestMvelContextUDFs {
@Test
public void testGetDataType() {
Assert.assertEquals(get_data_type("A"), "java.lang.String");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@
import java.util.Collections;
import java.util.Map;

import org.scalatest.testng.TestNGSuite;
import org.testng.annotations.Test;

import static java.util.Collections.*;
import static org.testng.Assert.*;


public class TestFeatureValueOldAPICompatibility extends TestNGSuite {
public class TestFeatureValueOldAPICompatibility {
@Test
public void basicEqualityChecks() {
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@
import java.util.Arrays;
import java.util.Map;

import org.scalatest.testng.TestNGSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import static java.util.Collections.*;
import static org.testng.Assert.*;


public class TestFeatureValues extends TestNGSuite {
public class TestFeatureValues {
private static final Representable[] TENSOR_STRING_FLOAT_TYPE = new Representable[]{Primitive.STRING, Primitive.FLOAT};
private static final Representable[] TENSOR_INT_FLOAT_TYPE = new Representable[]{Primitive.INT, Primitive.FLOAT};

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.linkedin.feathr.offline;

import org.scalatest.testng.TestNGSuite;
import org.testng.annotations.Test;

import java.util.HashMap;
Expand All @@ -10,7 +9,7 @@
import static org.testng.Assert.assertEquals;


public class TestMvelContext extends TestNGSuite {
public class TestMvelContext {
@Test
public void testCosineSimilarity() {
// Test basic cosine similarity calculation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.mvel2.integration.VariableResolverFactory;
import org.mvel2.integration.impl.MapVariableResolverFactory;
import org.mvel2.optimizers.OptimizerFactory;
import org.scalatest.testng.TestNGSuite;
import org.testng.Assert;
import org.testng.annotations.Test;

Expand All @@ -22,7 +21,7 @@
/**
* Test MVEL expression evaluator
*/
public class TestMvelExpression extends TestNGSuite {
public class TestMvelExpression {
@Test(description = "test mvel expression foo.bar on a map field of GenericRecord, where foo is the map field "
+ "and bar is the target key in the map")
public void testMVELExpressionOnMap() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ abstract class FeathrIntegTest extends TestFeathr {

val generatedDataFolder = "src/integTest/generated"
val mockDataFolder = generatedDataFolder + "/mockData"
val trainingData = "src/test/resources/obs/"
val trainingData = "src/test/resources/obs/obs.csv"
val featureData = mockDataFolder + "/a_feature_data"

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,6 @@ class SlidingWindowAggIntegTest extends FeathrIntegTest {
}


/**
@Test
def testSWACountDistinct(): Unit = {
val featureDefAsString =
Expand Down Expand Up @@ -1080,5 +1079,5 @@ class SlidingWindowAggIntegTest extends FeathrIntegTest {
val dfs = runLocalFeatureJoinForTest(featureJoinAsString, featureDefAsString, "featuresWithFilterObs.avro.json").data

validateRows(dfs.select(keyField, features: _*).collect().sortBy(row => row.getAs[Int](keyField)), expectedRows)
}*/
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ import com.linkedin.feathr.offline.util.FeathrTestUtils
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.SparkSession
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{AfterClass, BeforeClass}

import scala.collection.convert.wrapAll._
import scala.reflect.ClassTag

// abstract class for all feathr tests
abstract class TestFeathr extends TestNGSuite {
abstract class TestFeathr {
protected var ss: SparkSession = _
protected var conf: Configuration = _
protected var feathr: FeathrClient = _
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package com.linkedin.feathr.offline

import com.linkedin.feathr.offline.util.FeathrUtils
import org.scalatest.testng.TestNGSuite

import org.testng.Assert._
import org.testng.annotations.Test

class TestFeathrUtils extends TestNGSuite {
class TestFeathrUtils {
private val TEST_FEATHR_VERSION = "1.2.3"

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import com.linkedin.feathr.offline.anchored.feature.{FeatureAnchor, FeatureAncho
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import org.mockito.Mockito.when
import org.scalatest.mockito.MockitoSugar.mock
import org.scalatestplus.mockito.MockitoSugar.mock
import org.testng.Assert.assertEquals
import org.testng.annotations.Test

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ import com.jasonclawson.jackson.dataformat.hocon.HoconFactory
import com.linkedin.feathr.common.FeathrJacksonScalaModule
import com.linkedin.feathr.offline.config.location.{Jdbc, LocationUtils, Snowflake}
import com.linkedin.feathr.offline.source.{DataSource, SourceFormatType}
import org.scalatest.FunSuite
import org.scalatest.funsuite.AnyFunSuite

import scala.collection.mutable


class TestDataSourceLoader extends FunSuite {
class TestDataSourceLoader extends AnyFunSuite {
/// Base line test to ensure backward compatibility
test("DataSourceLoader.deserialize BaseLine") {
val configDoc =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package com.linkedin.feathr.offline.config

import com.linkedin.feathr.common.exception.FeathrConfigException
import org.scalatest.testng.TestNGSuite

import org.testng.Assert
import org.testng.annotations.Test

import scala.io.Source

class TestFeatureGroupsGenerator extends TestNGSuite{
class TestFeatureGroupsGenerator{

private val _feathrConfigLoader = FeathrConfigLoader()

Expand Down
Loading