Uploaded image for project: 'CDAP'
  1. CDAP
  2. CDAP-13281

Batch data pipelines with Spark engine fail on Spark 2.2

    XMLWordPrintableJSON

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Major
    • Resolution: Fixed
    • Affects Version/s: 4.3.4
    • Fix Version/s: 5.0.0
    • Component/s: Pipelines
    • Labels:
      None
    • Release Notes:
      Fixed an issue where Spark 2.2 batch pipelines with HDFS sinks would fail with delegation token issue error
    • Rank:
      1|i00buf:

      Description

      The pipelines fail with the following exception in Spark 2.2 -

      2018-04-04 22:01:26,547 - ERROR [Driver:o.a.s.d.y.ApplicationMaster@91] - User class threw exception: org.apache.hadoop.ipc.RemoteException(java.io.IOException): Delegation Token can be issued only with kerberos or web authentication
      	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getDelegationToken(FSNamesystem.java:7498)
      	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getDelegationToken(NameNodeRpcServer.java:548)
      	at org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.getDelegationToken(AuthorizationProviderProxyClientProtocol.java:663)
      	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:981)
      	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
      	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
      	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
      	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2220)
      	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2216)
      	at java.security.AccessController.doPrivileged(Native Method)
      	at javax.security.auth.Subject.doAs(Subject.java:422)
      	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
      	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2214)
      
      org.apache.hadoop.ipc.RemoteException: Delegation Token can be issued only with kerberos or web authentication
      	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getDelegationToken(FSNamesystem.java:7498)
      	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getDelegationToken(NameNodeRpcServer.java:548)
      	at org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.getDelegationToken(AuthorizationProviderProxyClientProtocol.java:663)
      	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:981)
      	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
      	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
      	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
      	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2220)
      	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2216)
      	at java.security.AccessController.doPrivileged(Native Method)
      	at javax.security.auth.Subject.doAs(Subject.java:422)
      	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
      	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2214)
      
      	at org.apache.hadoop.ipc.Client.call(Client.java:1504) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.ipc.Client.call(Client.java:1441) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:230) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at com.sun.proxy.$Proxy35.getDelegationToken(Unknown Source) ~[na:na]
      	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getDelegationToken(ClientNamenodeProtocolTranslatorPB.java:928) ~[hadoop-hdfs-2.6.0-cdh5.11.2.jar:na]
      	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_161]
      	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_161]
      	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_161]
      	at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_161]
      	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:256) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:104) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at com.sun.proxy.$Proxy36.getDelegationToken(Unknown Source) ~[na:na]
      	at org.apache.hadoop.hdfs.DFSClient.getDelegationToken(DFSClient.java:1082) ~[hadoop-hdfs-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.hdfs.DistributedFileSystem.getDelegationToken(DistributedFileSystem.java:1499) ~[hadoop-hdfs-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.fs.FileSystem.collectDelegationTokens(FileSystem.java:546) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.fs.FileSystem.addDelegationTokens(FileSystem.java:524) ~[hadoop-common-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.hdfs.DistributedFileSystem.addDelegationTokens(DistributedFileSystem.java:2283) ~[hadoop-hdfs-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.mapreduce.security.TokenCache.obtainTokensForNamenodesInternal(TokenCache.java:140) ~[hadoop-mapreduce-client-core-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.mapreduce.security.TokenCache.obtainTokensForNamenodesInternal(TokenCache.java:100) ~[hadoop-mapreduce-client-core-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.mapreduce.security.TokenCache.obtainTokensForNamenodes(TokenCache.java:80) ~[hadoop-mapreduce-client-core-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:142) ~[hadoop-mapreduce-client-core-2.6.0-cdh5.11.2.jar:na]
      	at org.apache.spark.internal.io.SparkHadoopMapReduceWriter$.write(SparkHadoopMapReduceWriter.scala:76) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1085) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1084) ~[spark-core_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      	at co.cask.cdap.app.runtime.spark.DefaultSparkExecutionContext.saveAsNewAPIHadoopDataset(DefaultSparkExecutionContext.scala:39) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.AbstractSparkExecutionContext$$anonfun$saveAsDataset$1.apply(AbstractSparkExecutionContext.scala:353) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.AbstractSparkExecutionContext$$anonfun$saveAsDataset$1.apply(AbstractSparkExecutionContext.scala:344) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.AbstractSparkExecutionContext$$anon$7.run(AbstractSparkExecutionContext.scala:447) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SparkTransactional.execute(SparkTransactional.java:206) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.AbstractSparkExecutionContext.saveAsDataset(AbstractSparkExecutionContext.scala:438) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.AbstractSparkExecutionContext.saveAsDataset(AbstractSparkExecutionContext.scala:344) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SerializableSparkExecutionContext.saveAsDataset(SerializableSparkExecutionContext.scala:70) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.DefaultJavaSparkExecutionContext.saveAsDataset(DefaultJavaSparkExecutionContext.scala:210) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.DefaultJavaSparkExecutionContext.saveAsDataset(DefaultJavaSparkExecutionContext.scala:202) ~[na:na]
      	at co.cask.cdap.etl.spark.batch.SparkBatchSinkFactory.writeFromRDD(SparkBatchSinkFactory.java:103) ~[hydrator-spark-core2_2.11-4.3.4.jar:na]
      	at co.cask.cdap.etl.spark.batch.RDDCollection.store(RDDCollection.java:160) ~[hydrator-spark-core2_2.11-4.3.4.jar:na]
      	at co.cask.cdap.etl.spark.SparkPipelineRunner.runPipeline(SparkPipelineRunner.java:181) ~[hydrator-spark-core2_2.11-4.3.4.jar:na]
      	at co.cask.cdap.etl.spark.batch.BatchSparkPipelineDriver.run(BatchSparkPipelineDriver.java:151) ~[hydrator-spark-core2_2.11-4.3.4.jar:na]
      	at co.cask.cdap.api.Transactionals$2.run(Transactionals.java:72) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SparkTransactional$2.run(SparkTransactional.java:231) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SparkTransactional.execute(SparkTransactional.java:206) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SparkTransactional.execute(SparkTransactional.java:136) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.AbstractSparkExecutionContext.execute(AbstractSparkExecutionContext.scala:206) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SerializableSparkExecutionContext.execute(SerializableSparkExecutionContext.scala:62) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.DefaultJavaSparkExecutionContext.execute(DefaultJavaSparkExecutionContext.scala:96) ~[na:na]
      	at co.cask.cdap.api.Transactionals.execute(Transactionals.java:69) ~[na:na]
      	at co.cask.cdap.etl.spark.batch.BatchSparkPipelineDriver.run(BatchSparkPipelineDriver.java:118) ~[hydrator-spark-core2_2.11-4.3.4.jar:na]
      	at co.cask.cdap.app.runtime.spark.SparkMainWrapper$.main(SparkMainWrapper.scala:82) ~[na:na]
      	at co.cask.cdap.app.runtime.spark.SparkMainWrapper.main(SparkMainWrapper.scala) ~[na:na]
      	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_161]
      	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_161]
      	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_161]
      	at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_161]
      	at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$3.run(ApplicationMaster.scala:686) ~[spark-yarn_2.11-2.2.0.cloudera2.jar:2.2.0.cloudera2]
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                poorna Poorna Chandra
                Reporter:
                poorna Poorna Chandra
              • Votes:
                0 Vote for this issue
                Watchers:
                1 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: