Quantcast

The driver hangs at DataFrame.rdd in Spark 2.1.0

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

The driver hangs at DataFrame.rdd in Spark 2.1.0

StanZhai
Hi all,

The driver hangs at DataFrame.rdd in Spark 2.1.0 when the DataFrame(SQL) is complex, Following thread dump of my driver:

org.apache.spark.sql.catalyst.expressions.AttributeReference.equals(namedExpressions.scala:230)
org.apache.spark.sql.catalyst.expressions.IsNotNull.equals(nullExpressions.scala:312)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315)
scala.collection.mutable.FlatHashTable$class.addEntry(FlatHashTable.scala:151)
scala.collection.mutable.HashSet.addEntry(HashSet.scala:40)
scala.collection.mutable.FlatHashTable$class.addElem(FlatHashTable.scala:139)
scala.collection.mutable.HashSet.addElem(HashSet.scala:40)
scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:59)
scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:40)
scala.collection.generic.Growable$$anonfun$$plus$plus$eq$1.apply(Growable.scala:59)
scala.collection.generic.Growable$$anonfun$$plus$plus$eq$1.apply(Growable.scala:59)
scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
scala.collection.mutable.AbstractSet.$plus$plus$eq(Set.scala:46)
scala.collection.mutable.HashSet.clone(HashSet.scala:83)
scala.collection.mutable.HashSet.clone(HashSet.scala:40)
org.apache.spark.sql.catalyst.expressions.ExpressionSet.$plus(ExpressionSet.scala:65)
org.apache.spark.sql.catalyst.expressions.ExpressionSet.$plus(ExpressionSet.scala:50)
scala.collection.SetLike$$anonfun$$plus$plus$1.apply(SetLike.scala:141)
scala.collection.SetLike$$anonfun$$plus$plus$1.apply(SetLike.scala:141)
scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
scala.collection.immutable.HashSet$HashSet1.foreach(HashSet.scala:316)
scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:972)
scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:972)
scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:972)
scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
scala.collection.AbstractTraversable.foldLeft(Traversable.scala:104)
scala.collection.TraversableOnce$class.$div$colon(TraversableOnce.scala:151)
scala.collection.AbstractTraversable.$div$colon(Traversable.scala:104)
scala.collection.SetLike$class.$plus$plus(SetLike.scala:141)
org.apache.spark.sql.catalyst.expressions.ExpressionSet.$plus$plus(ExpressionSet.scala:50)
org.apache.spark.sql.catalyst.plans.logical.UnaryNode$$anonfun$getAliasedConstraints$1.apply(LogicalPlan.scala:300)
org.apache.spark.sql.catalyst.plans.logical.UnaryNode$$anonfun$getAliasedConstraints$1.apply(LogicalPlan.scala:297)
scala.collection.immutable.List.foreach(List.scala:381)
org.apache.spark.sql.catalyst.plans.logical.UnaryNode.getAliasedConstraints(LogicalPlan.scala:297)
org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58)
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745})
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187)
org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58)
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745})
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187)
org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58)
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745})
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187)
org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58)
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745})
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187)
org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58)
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745})
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187)
org.apache.spark.sql.catalyst.plans.logical.Join.validConstraints(basicLogicalOperators.scala:302)
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745})
org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187)
org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$$anonfun$apply$13.applyOrElse(Optimizer.scala:618)
org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$$anonfun$apply$13.applyOrElse(Optimizer.scala:605)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:332)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:332)
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:331)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202)
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200)
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202)
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200)
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202)
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200)
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202)
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200)
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337)
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:321)
org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$.apply(Optimizer.scala:605)
org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$.apply(Optimizer.scala:604)
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)
scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66)
scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35)
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
scala.collection.immutable.List.foreach(List.scala:381)
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:73) => holding Monitor(org.apache.spark.sql.execution.QueryExecution@184471747})
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:73)
org.apache.spark.sql.execution.QueryExecution$$anonfun$toString$2.apply(QueryExecution.scala:230)
org.apache.spark.sql.execution.QueryExecution$$anonfun$toString$2.apply(QueryExecution.scala:230)
org.apache.spark.sql.execution.QueryExecution.stringOrError(QueryExecution.scala:107)
org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:230)
org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2544)
org.apache.spark.sql.Dataset.rdd(Dataset.scala:2544)
...

The CPU usage of the driver remains 100% like this:

I didn't find this issue in Spark 1.6.2, what causes this in Spark 2.1.0? 

Any help is greatly appreciated!

Best,
Stan
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: The driver hangs at DataFrame.rdd in Spark 2.1.0

Liang-Chi Hsieh
Hi Stan,

Looks like it is the same issue we are working to solve. Related PRs are:

https://github.com/apache/spark/pull/16998
https://github.com/apache/spark/pull/16785

You can take a look of those PRs and help review too. Thanks.


StanZhai wrote
Hi all,


The driver hangs at DataFrame.rdd in Spark 2.1.0 when the DataFrame(SQL) is complex, Following thread dump of my driver:


org.apache.spark.sql.catalyst.expressions.AttributeReference.equals(namedExpressions.scala:230) org.apache.spark.sql.catalyst.expressions.IsNotNull.equals(nullExpressions.scala:312) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) org.apache.spark.sql.catalyst.expressions.Or.equals(predicates.scala:315) scala.collection.mutable.FlatHashTable$class.addEntry(FlatHashTable.scala:151) scala.collection.mutable.HashSet.addEntry(HashSet.scala:40) scala.collection.mutable.FlatHashTable$class.addElem(FlatHashTable.scala:139) scala.collection.mutable.HashSet.addElem(HashSet.scala:40) scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:59) scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:40) scala.collection.generic.Growable$$anonfun$$plus$plus$eq$1.apply(Growable.scala:59) scala.collection.generic.Growable$$anonfun$$plus$plus$eq$1.apply(Growable.scala:59) scala.collection.mutable.HashSet.foreach(HashSet.scala:78) scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) scala.collection.mutable.AbstractSet.$plus$plus$eq(Set.scala:46) scala.collection.mutable.HashSet.clone(HashSet.scala:83) scala.collection.mutable.HashSet.clone(HashSet.scala:40) org.apache.spark.sql.catalyst.expressions.ExpressionSet.$plus(ExpressionSet.scala:65) org.apache.spark.sql.catalyst.expressions.ExpressionSet.$plus(ExpressionSet.scala:50) scala.collection.SetLike$$anonfun$$plus$plus$1.apply(SetLike.scala:141) scala.collection.SetLike$$anonfun$$plus$plus$1.apply(SetLike.scala:141) scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) scala.collection.immutable.HashSet$HashSet1.foreach(HashSet.scala:316) scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:972) scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:972) scala.collection.immutable.HashSet$HashTrieSet.foreach(HashSet.scala:972) scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157) scala.collection.AbstractTraversable.foldLeft(Traversable.scala:104) scala.collection.TraversableOnce$class.$div$colon(TraversableOnce.scala:151) scala.collection.AbstractTraversable.$div$colon(Traversable.scala:104) scala.collection.SetLike$class.$plus$plus(SetLike.scala:141) org.apache.spark.sql.catalyst.expressions.ExpressionSet.$plus$plus(ExpressionSet.scala:50) org.apache.spark.sql.catalyst.plans.logical.UnaryNode$$anonfun$getAliasedConstraints$1.apply(LogicalPlan.scala:300) org.apache.spark.sql.catalyst.plans.logical.UnaryNode$$anonfun$getAliasedConstraints$1.apply(LogicalPlan.scala:297) scala.collection.immutable.List.foreach(List.scala:381) org.apache.spark.sql.catalyst.plans.logical.UnaryNode.getAliasedConstraints(LogicalPlan.scala:297) org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745}) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187) org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745}) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187) org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745}) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187) org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745}) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187) org.apache.spark.sql.catalyst.plans.logical.Project.validConstraints(basicLogicalOperators.scala:58) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745}) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187) org.apache.spark.sql.catalyst.plans.logical.Join.validConstraints(basicLogicalOperators.scala:302) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints$lzycompute(QueryPlan.scala:187) => holding Monitor(org.apache.spark.sql.catalyst.plans.logical.Join@1365611745}) org.apache.spark.sql.catalyst.plans.QueryPlan.constraints(QueryPlan.scala:187) org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$$anonfun$apply$13.applyOrElse(Optimizer.scala:618) org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$$anonfun$apply$13.applyOrElse(Optimizer.scala:605) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:332) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:332) org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:331) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202) org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200) org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202) org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200) org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202) org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200) org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$1.apply(TreeNode.scala:202) org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:200) org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:337) org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:321) org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$.apply(Optimizer.scala:605) org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints$.apply(Optimizer.scala:604) org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) scala.collection.immutable.List.foreach(List.scala:381) org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:73) => holding Monitor(org.apache.spark.sql.execution.QueryExecution@184471747}) org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:73) org.apache.spark.sql.execution.QueryExecution$$anonfun$toString$2.apply(QueryExecution.scala:230) org.apache.spark.sql.execution.QueryExecution$$anonfun$toString$2.apply(QueryExecution.scala:230) org.apache.spark.sql.execution.QueryExecution.stringOrError(QueryExecution.scala:107) org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:230) org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2544) org.apache.spark.sql.Dataset.rdd(Dataset.scala:2544)...


The CPU usage of the driver remains 100% like this:



I didn't find this issue in Spark 1.6.2, what causes this in Spark 2.1.0?


Any help is greatly appreciated!


Best,
Stan

25FEF70B@52873242.80CAAD58 (27K) <http://apache-spark-developers-list.1001551.n3.nabble.com/attachment/21050/0/25FEF70B%4052873242.80CAAD58>
Liang-Chi Hsieh | @viirya
Spark Technology Center
http://www.spark.tc/
Loading...