Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions connect/src/main/protobuf/graphframes.proto
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ message ShortestPaths {
bool use_local_checkpoints = 3;
int32 checkpoint_interval = 4;
optional StorageLevel storage_level = 5;
optional bool is_directed = 6;
}

message StronglyConnectedComponents {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,12 +349,19 @@ object GraphFramesConnectUtils {
pregel.run()
}
case proto.GraphFramesAPI.MethodCase.SHORTEST_PATHS => {
val isDirected = if (apiMessage.getShortestPaths.hasIsDirected) {
apiMessage.getShortestPaths.getIsDirected
} else {
true
}

val spBuilder = graphFrame.shortestPaths
.landmarks(
apiMessage.getShortestPaths.getLandmarksList.asScala.map(parseLongOrStringID).toSeq)
.setAlgorithm(apiMessage.getShortestPaths.getAlgorithm)
.setCheckpointInterval(apiMessage.getShortestPaths.getCheckpointInterval)
.setUseLocalCheckpoints(apiMessage.getShortestPaths.getUseLocalCheckpoints)
.setIsDirected(isDirected)

if (apiMessage.getShortestPaths.hasStorageLevel) {
spBuilder
Expand Down
19 changes: 12 additions & 7 deletions core/src/main/scala/org/graphframes/lib/ShortestPaths.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import org.graphframes.GraphFramesUnreachableException
import org.graphframes.Logging
import org.graphframes.WithAlgorithmChoice
import org.graphframes.WithCheckpointInterval
import org.graphframes.WithDirection
import org.graphframes.WithIntermediateStorageLevel
import org.graphframes.WithLocalCheckpoints

Expand All @@ -59,7 +60,8 @@ class ShortestPaths private[graphframes] (private val graph: GraphFrame)
with WithAlgorithmChoice
with WithCheckpointInterval
with WithLocalCheckpoints
with WithIntermediateStorageLevel {
with WithIntermediateStorageLevel
with WithDirection {
import org.graphframes.lib.ShortestPaths._

private var lmarks: Option[Seq[Any]] = None
Expand All @@ -83,14 +85,15 @@ class ShortestPaths private[graphframes] (private val graph: GraphFrame)
def run(): DataFrame = {
val lmarksChecked = check(lmarks, "landmarks")
val res = algorithm match {
case ALGO_GRAPHX => runInGraphX(graph, lmarksChecked)
case ALGO_GRAPHX => runInGraphX(graph, lmarksChecked, isDirected)
case ALGO_GRAPHFRAMES =>
runInGraphFrames(
graph,
lmarksChecked,
checkpointInterval,
useLocalCheckpoints = useLocalCheckpoints,
intermediateStorageLevel = intermediateStorageLevel)
intermediateStorageLevel = intermediateStorageLevel,
isDirected = isDirected)
case _ => throw new GraphFramesUnreachableException()
}
resultIsPersistent()
Expand All @@ -100,10 +103,13 @@ class ShortestPaths private[graphframes] (private val graph: GraphFrame)

private object ShortestPaths extends Logging {

private def runInGraphX(graph: GraphFrame, landmarks: Seq[Any]): DataFrame = {
private def runInGraphX(
graph: GraphFrame,
landmarks: Seq[Any],
isDirected: Boolean): DataFrame = {
val longIdToLandmark = landmarks.map(l => GraphXConversions.integralId(graph, l) -> l).toMap
val gx = graphx.lib.ShortestPaths
.run(graph.cachedTopologyGraphX, longIdToLandmark.keys.toSeq.sorted)
.run(graph.cachedTopologyGraphX, longIdToLandmark.keys.toSeq.sorted, isDirected)
val g = GraphXConversions.fromGraphX(graph, gx, vertexNames = Seq(DISTANCE_ID))
val distanceCol: Column = if (graph.hasIntegralIdType) {
g.vertices(DISTANCE_ID)
Expand All @@ -126,10 +132,9 @@ private object ShortestPaths extends Logging {
graph: GraphFrame,
landmarks: Seq[Any],
checkpointInterval: Int,
isDirected: Boolean = true,
isDirected: Boolean,
useLocalCheckpoints: Boolean,
intermediateStorageLevel: StorageLevel): DataFrame = {
logWarn("The GraphFrames based implementation is slow and considered experimental!")
val vertexType = graph.vertices.schema(GraphFrame.ID).dataType

// For landmark vertices the initial distance to itself is set to 0
Expand Down
27 changes: 27 additions & 0 deletions core/src/main/scala/org/graphframes/mixins.scala
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,30 @@ private[graphframes] trait WithLocalCheckpoints {
*/
def getUseLocalCheckpoints: Boolean = useLocalCheckpoints
}

/**
* Provides support of graph directions for algorithms.
*/
private[graphframes] trait WithDirection {
protected var isDirected: Boolean = true

/**
* Sets should graph be cosidered as directed.
*
* @param value
* true to handle graph as directed
* @return
*/
def setIsDirected(value: Boolean): this.type = {
isDirected = value
this
}

/**
* Gets should graph be considred as directed.
*
* @return
* true if directed
*/
def getIsDirected: Boolean = isDirected
}
26 changes: 23 additions & 3 deletions core/src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,22 @@ class TestLDBCCases extends SparkFunSuite with GraphFrameTestSparkContext {
GraphFrame(nodes, edges)
}

private def readDirectedUnweighted(pathPrefix: String): GraphFrame = {
val edges = spark.read
.option("delimiter", " ")
.option("header", "false")
.schema(StructType(Seq(StructField("src", LongType), StructField("dst", LongType))))
.csv(s"${pathPrefix}.e")
.toDF("src", "dst")

val nodes = spark.read
.text(s"${pathPrefix}.v")
.toDF("id")
.select(col("id").cast(LongType))

GraphFrame(nodes, edges)
}

private def readProperties(path: Path): Properties = {
val props = new Properties()
val stream = Files.newInputStream(path)
Expand All @@ -52,7 +68,7 @@ class TestLDBCCases extends SparkFunSuite with GraphFrameTestSparkContext {
props
}

private lazy val ldbcTestBFSUndirected: (GraphFrame, DataFrame, Long) = {
private lazy val ldbcTestBFSDirected: (GraphFrame, DataFrame, Long) = {
LDBCUtils.downloadLDBCIfNotExists(resourcesPath, LDBCUtils.TEST_BFS_UNDIRECTED)
val caseRoot = resourcesPath.resolve(LDBCUtils.TEST_BFS_UNDIRECTED)

Expand All @@ -66,18 +82,22 @@ class TestLDBCCases extends SparkFunSuite with GraphFrameTestSparkContext {
.toDF("id", "distance")
val props = readProperties(caseRoot.resolve(s"${LDBCUtils.TEST_BFS_UNDIRECTED}.properties"))
(
readUndirectedUnweighted(s"${caseRoot.toString}/${LDBCUtils.TEST_BFS_UNDIRECTED}"),
readDirectedUnweighted(s"${caseRoot.toString}/${LDBCUtils.TEST_BFS_UNDIRECTED}"),
expectedDistances,
props.getProperty(s"graph.${LDBCUtils.TEST_BFS_UNDIRECTED}.bfs.source-vertex").toLong)
}

Seq("graphframes", "graphx").foreach { algo =>
test(s"test undirected BFS with LDBC for impl ${algo}") {
val testCase = ldbcTestBFSUndirected
val testCase = ldbcTestBFSDirected
val srcVertex = testCase._3

// this graph is undirected, but in GF direction exists
// only on the level of algorithms!
val spResult = testCase._1.shortestPaths
.landmarks(Seq(srcVertex))
.setAlgorithm(algo)
.setIsDirected(false)
.run()
.select(
col(GraphFrame.ID),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,17 @@ object ShortestPaths extends Serializable {
* the graph for which to compute the shortest paths
* @param landmarks
* the list of landmark vertex ids. Shortest paths will be computed to each landmark.
* @param isDirected
* should only directed paths be returned (default true)
*
* @return
* a graph where each vertex attribute is a map containing the shortest-path distance to each
* reachable landmark vertex.
*/
def run[VD, ED: ClassTag](graph: Graph[VD, ED], landmarks: Seq[VertexId]): Graph[SPMap, ED] = {
def run[VD, ED: ClassTag](
graph: Graph[VD, ED],
landmarks: Seq[VertexId],
isDirected: Boolean = true): Graph[SPMap, ED] = {
val spGraph = graph.mapVertices { (vid, _) =>
if (landmarks.contains(vid)) makeMap(vid -> 0) else makeMap()
}
Expand All @@ -73,10 +78,29 @@ object ShortestPaths extends Serializable {
addMaps(attr, msg)
}

def sendMessage(edge: EdgeTriplet[SPMap, _]): Iterator[(VertexId, SPMap)] = {
val newAttr = incrementMap(edge.dstAttr)
if (edge.srcAttr != addMaps(newAttr, edge.srcAttr)) Iterator((edge.srcId, newAttr))
else Iterator.empty
val sendMessage: (EdgeTriplet[SPMap, _]) => Iterator[(VertexId, SPMap)] = if (isDirected) {
(edge: EdgeTriplet[SPMap, _]) =>
{
val newAttr = incrementMap(edge.dstAttr)
if (edge.srcAttr != addMaps(newAttr, edge.srcAttr)) Iterator((edge.srcId, newAttr))
else Iterator.empty
}
} else { (edge: EdgeTriplet[SPMap, _]) =>
{
val newDstAttr = incrementMap(edge.dstAttr)
val newSrcAttr = incrementMap(edge.srcAttr)

val srcIter =
if (edge.srcAttr != addMaps(newDstAttr, edge.srcAttr))
Iterator((edge.srcId, newDstAttr))
else Iterator.empty
val dstIter =
if (edge.dstAttr != addMaps(newSrcAttr, edge.dstAttr))
Iterator((edge.dstId, newSrcAttr))
else Iterator.empty

srcIter ++ dstIter
}
}

Pregel(spGraph, initialMessage)(vertexProgram, sendMessage, addMaps)
Expand Down
15 changes: 8 additions & 7 deletions python/graphframes/classic/graphframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,15 @@ def shortestPaths(
use_local_checkpoints: bool,
checkpoint_interval: int,
storage_level: StorageLevel,
is_directed: bool,
) -> DataFrame:
java_sp = self._jvm_graph.shortestPaths()
java_sp.landmarks(landmarks)
java_sp.setAlgorithm(algorithm)
java_sp.setUseLocalCheckpoints(use_local_checkpoints)
java_sp.setCheckpointInterval(checkpoint_interval)
java_sp.setIntermediateStorageLevel(storage_level_to_jvm(storage_level, self._spark))
java_sp.setIsDirected(is_directed)
jdf = java_sp.run()

return DataFrame(jdf, self._spark)
Expand Down Expand Up @@ -343,11 +345,10 @@ def k_core(
use_local_checkpoints: bool,
storage_level: StorageLevel,
) -> DataFrame:
jdf = (
self._jvm_graph.kCore()
.setUseLocalCheckpoints(use_local_checkpoints)
.setCheckpointInterval(checkpoint_interval)
.setIntermediateStorageLevel(storage_level_to_jvm(storage_level, self._spark))
.run()
)
java_kcore = self._jvm_graph.kCore()
java_kcore.setUseLocalCheckpoints(use_local_checkpoints)
java_kcore.setCheckpointInterval(checkpoint_interval)
java_kcore.setIntermediateStorageLevel(storage_level_to_jvm(storage_level, self._spark))
jdf = java_kcore.run()

return DataFrame(jdf, self._spark)
5 changes: 5 additions & 0 deletions python/graphframes/connect/graphframes_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,7 @@ def shortestPaths(
use_local_checkpoints: bool,
checkpoint_interval: int,
storage_level: StorageLevel,
is_directed: bool,
) -> DataFrame:
@final
class ShortestPaths(LogicalPlan):
Expand All @@ -891,6 +892,7 @@ def __init__(
use_local_checkpoints: bool,
checkpoint_interval: int,
storage_level: StorageLevel,
is_directed: bool,
) -> None:
super().__init__(None)
self.v = v
Expand All @@ -900,6 +902,7 @@ def __init__(
self.use_local_checkpoints = use_local_checkpoints
self.checkpoint_interval = checkpoint_interval
self.storage_level = storage_level
self.is_directed = is_directed

@override
def plan(self, session: SparkConnectClient) -> proto.Relation:
Expand All @@ -913,6 +916,7 @@ def plan(self, session: SparkConnectClient) -> proto.Relation:
use_local_checkpoints=self.use_local_checkpoints,
checkpoint_interval=self.checkpoint_interval,
storage_level=storage_level_to_proto(self.storage_level),
is_directed=self.is_directed,
)
)
plan = self._create_proto_relation()
Expand All @@ -928,6 +932,7 @@ def plan(self, session: SparkConnectClient) -> proto.Relation:
use_local_checkpoints,
checkpoint_interval,
storage_level,
is_directed,
),
self._spark,
)
Expand Down
Loading