Querying Cassandra data using Spark SQL in Scala -


i trying query cassandra data using spark sql in scala.

    import com.datastax.spark.connector._       import org.apache.spark.sparkcontext      import org.apache.spark.sparkcontext._     import org.apache.spark.sparkconf      val conf= new sparkconf(true)     .set("spark.cassandra.connection.host","**.*.**.***")     .set("spark.cassandra.auth.username","****")     .set("spark.cassandra.auth.password","****")        val sc = new sparkcontext(conf)      import org.apache.spark.sql._     val sqlcontext = new sqlcontext(sc)     sqlcontext.sql("select * energydata.demodata")   

and throws error:

org.apache.spark.sql.analysisexception: table or view not found: energydata.d emodata; line 1 pos 14; 'project [*] +- 'unresolvedrelation energydata.demodata

at org.apache.spark.sql.catalyst.analysis.package$analysiserrorat.failanalysis (package.scala:42) @ org.apache.spark.sql.catalyst.analysis.checkanalysis$$anonfun$checkanalysis $1.apply(checkanalysis.scala:82) @ org.apache.spark.sql.catalyst.analysis.checkanalysis$$anonfun$checkanalysis $1.apply(checkanalysis.scala:78) @ org.apache.spark.sql.catalyst.trees.treenode.foreachup(treenode.scala:127) @ org.apache.spark.sql.catalyst.trees.treenode$$anonfun$foreachup$1.apply(tre enode.scala:126) @ org.apache.spark.sql.catalyst.trees.treenode$$anonfun$foreachup$1.apply(tre enode.scala:126) @ scala.collection.immutable.list.foreach(list.scala:381) @ org.apache.spark.sql.catalyst.trees.treenode.foreachup(treenode.scala:126) @ org.apache.spark.sql.catalyst.analysis.checkanalysis$class.checkanalysis(ch eckanalysis.scala:78) @ org.apache.spark.sql.catalyst.analysis.analyzer.checkanalysis(analyzer.scal a:91) @ org.apache.spark.sql.execution.queryexecution.assertanalyzed(queryexecution .scala:52) @ org.apache.spark.sql.dataset$.ofrows(dataset.scala:66) @ org.apache.spark.sql.sparksession.sql(sparksession.scala:623) @ org.apache.spark.sql.sqlcontext.sql(sqlcontext.scala:691) ... 54 elided

i want read table data without disturbing cassandra table. tried solution given here add hive-site.xml file spark/conf. when add spark/conf, seems spark not working properly.

at org.apache.spark.sql.sparksession$builder$$anonfun$getorcreate$5.appl y(sparksession.scala:938)         @ org.apache.spark.sql.sparksession$builder$$anonfun$getorcreate$5.appl y(sparksession.scala:938)         @ scala.collection.mutable.hashmap$$anonfun$foreach$1.apply(hashmap.sca la:99)         @ scala.collection.mutable.hashmap$$anonfun$foreach$1.apply(hashmap.sca la:99)         @ scala.collection.mutable.hashtable$class.foreachentry(hashtable.scala :230)         @ scala.collection.mutable.hashmap.foreachentry(hashmap.scala:40)         @ scala.collection.mutable.hashmap.foreach(hashmap.scala:99)         @ org.apache.spark.sql.sparksession$builder.getorcreate(sparksession.sc ala:938)         @ org.apache.spark.repl.main$.createsparksession(main.scala:97)         @ $line3.$read$$iw$$iw.<init>(<console>:15)         @ $line3.$read$$iw.<init>(<console>:42)         @ $line3.$read.<init>(<console>:44)         @ $line3.$read$.<init>(<console>:48)         @ $line3.$read$.<clinit>(<console>)         @ $line3.$eval$.$print$lzycompute(<console>:7)         @ $line3.$eval$.$print(<console>:6)         @ $line3.$eval.$print(<console>)         @ sun.reflect.nativemethodaccessorimpl.invoke0(native method)         @ sun.reflect.nativemethodaccessorimpl.invoke(unknown source)         @ sun.reflect.delegatingmethodaccessorimpl.invoke(unknown source)         @ java.lang.reflect.method.invoke(unknown source)         @ scala.tools.nsc.interpreter.imain$readevalprint.call(imain.scala:786)          @ scala.tools.nsc.interpreter.imain$request.loadandrun(imain.scala:1047 )         @ scala.tools.nsc.interpreter.imain$wrappedrequest$$anonfun$loadandrunr eq$1.apply(imain.scala:638)         @ scala.tools.nsc.interpreter.imain$wrappedrequest$$anonfun$loadandrunr eq$1.apply(imain.scala:637)         @ scala.reflect.internal.util.scalaclassloader$class.ascontext(scalacla ssloader.scala:31)         @ scala.reflect.internal.util.abstractfileclassloader.ascontext(abstrac tfileclassloader.scala:19)         @ scala.tools.nsc.interpreter.imain$wrappedrequest.loadandrunreq(imain. scala:637)         @ scala.tools.nsc.interpreter.imain.interpret(imain.scala:569)         @ scala.tools.nsc.interpreter.imain.interpret(imain.scala:565)         @ scala.tools.nsc.interpreter.iloop.interpretstartingwith(iloop.scala:8 07)         @ scala.tools.nsc.interpreter.iloop.command(iloop.scala:681)         @ scala.tools.nsc.interpreter.iloop.processline(iloop.scala:395)         @ org.apache.spark.repl.sparkiloop$$anonfun$initializespark$1.apply$mcv $sp(sparkiloop.scala:38)         @ org.apache.spark.repl.sparkiloop$$anonfun$initializespark$1.apply(spa rkiloop.scala:37)         @ org.apache.spark.repl.sparkiloop$$anonfun$initializespark$1.apply(spa rkiloop.scala:37)         @ scala.tools.nsc.interpreter.imain.bequietduring(imain.scala:214)         @ org.apache.spark.repl.sparkiloop.initializespark(sparkiloop.scala:37)          @ org.apache.spark.repl.sparkiloop.loadfiles(sparkiloop.scala:98)         @ scala.tools.nsc.interpreter.iloop$$anonfun$process$1.apply$mcz$sp(ilo op.scala:920)         @ scala.tools.nsc.interpreter.iloop$$anonfun$process$1.apply(iloop.scal a:909)         @ scala.tools.nsc.interpreter.iloop$$anonfun$process$1.apply(iloop.scal a:909)         @ scala.reflect.internal.util.scalaclassloader$.savingcontextloader(sca laclassloader.scala:97)         @ scala.tools.nsc.interpreter.iloop.process(iloop.scala:909)         @ org.apache.spark.repl.main$.domain(main.scala:70)         @ org.apache.spark.repl.main$.main(main.scala:53)         @ org.apache.spark.repl.main.main(main.scala)         @ sun.reflect.nativemethodaccessorimpl.invoke0(native method)         @ sun.reflect.nativemethodaccessorimpl.invoke(unknown source)         @ sun.reflect.delegatingmethodaccessorimpl.invoke(unknown source)         @ java.lang.reflect.method.invoke(unknown source)         @ org.apache.spark.deploy.sparksubmit$.org$apache$spark$deploy$sparksub mit$$runmain(sparksubmit.scala:755)         @ org.apache.spark.deploy.sparksubmit$.dorunmain$1(sparksubmit.scala:18 0)         @ org.apache.spark.deploy.sparksubmit$.submit(sparksubmit.scala:205)         @ org.apache.spark.deploy.sparksubmit$.main(sparksubmit.scala:119)         @ org.apache.spark.deploy.sparksubmit.main(sparksubmit.scala) caused by: java.lang.runtimeexception: unable instantiate org.apache.hadoop.h ive.ql.metadata.sessionhivemetastoreclient         @ org.apache.hadoop.hive.metastore.metastoreutils.newinstance(metastore utils.java:1523)         @ org.apache.hadoop.hive.metastore.retryingmetastoreclient.<init>(retry ingmetastoreclient.java:86)         @ org.apache.hadoop.hive.metastore.retryingmetastoreclient.getproxy(ret ryingmetastoreclient.java:132)         @ org.apache.hadoop.hive.metastore.retryingmetastoreclient.getproxy(ret ryingmetastoreclient.java:104)         @ org.apache.hadoop.hive.ql.metadata.hive.createmetastoreclient(hive.ja va:3005)         @ org.apache.hadoop.hive.ql.metadata.hive.getmsc(hive.java:3024)         @ org.apache.hadoop.hive.ql.metadata.hive.getalldatabases(hive.java:123 4)         ... 87 more caused by: java.lang.reflect.invocationtargetexception         @ sun.reflect.nativeconstructoraccessorimpl.newinstance0(native method)          @ sun.reflect.nativeconstructoraccessorimpl.newinstance(unknown source)          @ sun.reflect.delegatingconstructoraccessorimpl.newinstance(unknown sou rce)         @ java.lang.reflect.constructor.newinstance(unknown source)         @ org.apache.hadoop.hive.metastore.metastoreutils.newinstance(metastore utils.java:1521)         ... 93 more caused by: java.lang.illegalargumentexception: java.net.urisyntaxexception: rela tive path in absolute uri: file:$%7btest.warehouse.dir%7d         @ org.apache.hadoop.fs.path.initialize(path.java:205)         @ org.apache.hadoop.fs.path.<init>(path.java:196)         @ org.apache.hadoop.hive.metastore.warehouse.getdnspath(warehouse.java: 141)         @ org.apache.hadoop.hive.metastore.warehouse.getdnspath(warehouse.java: 146)         @ org.apache.hadoop.hive.metastore.warehouse.getwhroot(warehouse.java:1 59)         @ org.apache.hadoop.hive.metastore.warehouse.getdefaultdatabasepath(war ehouse.java:177)         @ org.apache.hadoop.hive.metastore.hivemetastore$hmshandler.createdefau ltdb_core(hivemetastore.java:600)         @ org.apache.hadoop.hive.metastore.hivemetastore$hmshandler.createdefau ltdb(hivemetastore.java:620)         @ org.apache.hadoop.hive.metastore.hivemetastore$hmshandler.init(hiveme tastore.java:461)         @ org.apache.hadoop.hive.metastore.retryinghmshandler.<init>(retryinghm shandler.java:66)         @ org.apache.hadoop.hive.metastore.retryinghmshandler.getproxy(retrying hmshandler.java:72)         @ org.apache.hadoop.hive.metastore.hivemetastore.newretryinghmshandler( hivemetastore.java:5762)         @ org.apache.hadoop.hive.metastore.hivemetastoreclient.<init>(hivemetas toreclient.java:199)         @ org.apache.hadoop.hive.ql.metadata.sessionhivemetastoreclient.<init>( sessionhivemetastoreclient.java:74)         ... 98 more caused by: java.net.urisyntaxexception: relative path in absolute uri: file:$%7b test.warehouse.dir%7d         @ java.net.uri.checkpath(unknown source)         @ java.net.uri.<init>(unknown source)         @ org.apache.hadoop.fs.path.initialize(path.java:202)         ... 111 more 17/07/26 11:40:06 warn objectstore: failed database default, returning no suchobjectexception java.lang.illegalargumentexception: error while instantiating 'org.apache.spark. sql.hive.hivesessionstatebuilder':   @ org.apache.spark.sql.sparksession$.org$apache$spark$sql$sparksession$$insta ntiatesessionstate(sparksession.scala:1053)   @ org.apache.spark.sql.sparksession$$anonfun$sessionstate$2.apply(sparksessio n.scala:130)   @ org.apache.spark.sql.sparksession$$anonfun$sessionstate$2.apply(sparksessio n.scala:130)   @ scala.option.getorelse(option.scala:121)   @ org.apache.spark.sql.sparksession.sessionstate$lzycompute(sparksession.scal a:129)   @ org.apache.spark.sql.sparksession.sessionstate(sparksession.scala:126)   @ org.apache.spark.sql.sparksession$builder$$anonfun$getorcreate$5.apply(spar ksession.scala:938)   @ org.apache.spark.sql.sparksession$builder$$anonfun$getorcreate$5.apply(spar ksession.scala:938)   @ scala.collection.mutable.hashmap$$anonfun$foreach$1.apply(hashmap.scala:99)    @ scala.collection.mutable.hashmap$$anonfun$foreach$1.apply(hashmap.scala:99)    @ scala.collection.mutable.hashtable$class.foreachentry(hashtable.scala:230)   @ scala.collection.mutable.hashmap.foreachentry(hashmap.scala:40)   @ scala.collection.mutable.hashmap.foreach(hashmap.scala:99)   @ org.apache.spark.sql.sparksession$builder.getorcreate(sparksession.scala:93 8)   @ org.apache.spark.repl.main$.createsparksession(main.scala:97)   ... 47 elided caused by: org.apache.spark.sql.analysisexception: java.lang.runtimeexception: j ava.lang.runtimeexception: unable instantiate org.apache.hadoop.hive.ql.metad ata.sessionhivemetastoreclient;   @ org.apache.spark.sql.hive.hiveexternalcatalog.withclient(hiveexternalcatalo g.scala:106)   @ org.apache.spark.sql.hive.hiveexternalcatalog.databaseexists(hiveexternalca talog.scala:193)   @ org.apache.spark.sql.internal.sharedstate.externalcatalog$lzycompute(shared state.scala:105)   @ org.apache.spark.sql.internal.sharedstate.externalcatalog(sharedstate.scala :93)   @ org.apache.spark.sql.hive.hivesessionstatebuilder.externalcatalog(hivesessi onstatebuilder.scala:39)   @ org.apache.spark.sql.hive.hivesessionstatebuilder.catalog$lzycompute(hivese ssionstatebuilder.scala:54)   @ org.apache.spark.sql.hive.hivesessionstatebuilder.catalog(hivesessionstateb uilder.scala:52)   @ org.apache.spark.sql.hive.hivesessionstatebuilder.catalog(hivesessionstateb uilder.scala:35)   @ org.apache.spark.sql.internal.basesessionstatebuilder.build(basesessionstat ebuilder.scala:289)   @ org.apache.spark.sql.sparksession$.org$apache$spark$sql$sparksession$$insta ntiatesessionstate(sparksession.scala:1050)   ... 61 more caused by: java.lang.runtimeexception: java.lang.runtimeexception: unable ins tantiate org.apache.hadoop.hive.ql.metadata.sessionhivemetastoreclient   @ org.apache.hadoop.hive.ql.session.sessionstate.start(sessionstate.java:522)    @ org.apache.spark.sql.hive.client.hiveclientimpl.<init>(hiveclientimpl.scala :191)   @ sun.reflect.nativeconstructoraccessorimpl.newinstance0(native method)   @ sun.reflect.nativeconstructoraccessorimpl.newinstance(unknown source)   @ sun.reflect.delegatingconstructoraccessorimpl.newinstance(unknown source)   @ java.lang.reflect.constructor.newinstance(unknown source)   @ org.apache.spark.sql.hive.client.isolatedclientloader.createclient(isolated clientloader.scala:264)   @ org.apache.spark.sql.hive.hiveutils$.newclientformetadata(hiveutils.scala:3 62)   @ org.apache.spark.sql.hive.hiveutils$.newclientformetadata(hiveutils.scala:2 66)   @ org.apache.spark.sql.hive.hiveexternalcatalog.client$lzycompute(hiveexterna lcatalog.scala:66)   @ org.apache.spark.sql.hive.hiveexternalcatalog.client(hiveexternalcatalog.sc ala:65)   @ org.apache.spark.sql.hive.hiveexternalcatalog$$anonfun$databaseexists$1.app ly$mcz$sp(hiveexternalcatalog.scala:194)   @ org.apache.spark.sql.hive.hiveexternalcatalog$$anonfun$databaseexists$1.app ly(hiveexternalcatalog.scala:194)   @ org.apache.spark.sql.hive.hiveexternalcatalog$$anonfun$databaseexists$1.app ly(hiveexternalcatalog.scala:194)   @ org.apache.spark.sql.hive.hiveexternalcatalog.withclient(hiveexternalcatalo g.scala:97)   ... 70 more caused by: java.lang.runtimeexception: unable instantiate org.apache.hadoop.h ive.ql.metadata.sessionhivemetastoreclient   @ org.apache.hadoop.hive.metastore.metastoreutils.newinstance(metastoreutils. java:1523)   @ org.apache.hadoop.hive.metastore.retryingmetastoreclient.<init>(retryingmet astoreclient.java:86)   @ org.apache.hadoop.hive.metastore.retryingmetastoreclient.getproxy(retryingm etastoreclient.java:132)   @ org.apache.hadoop.hive.metastore.retryingmetastoreclient.getproxy(retryingm etastoreclient.java:104)   @ org.apache.hadoop.hive.ql.metadata.hive.createmetastoreclient(hive.java:300 5)   @ org.apache.hadoop.hive.ql.metadata.hive.getmsc(hive.java:3024)   @ org.apache.hadoop.hive.ql.session.sessionstate.start(sessionstate.java:503)    ... 84 more caused by: java.lang.reflect.invocationtargetexception: java.lang.illegalargumen texception: java.net.urisyntaxexception: relative path in absolute uri: file:$%7 btest.warehouse.dir%7d   @ sun.reflect.nativeconstructoraccessorimpl.newinstance0(native method)   @ sun.reflect.nativeconstructoraccessorimpl.newinstance(unknown source)   @ sun.reflect.delegatingconstructoraccessorimpl.newinstance(unknown source)   @ java.lang.reflect.constructor.newinstance(unknown source)   @ org.apache.hadoop.hive.metastore.metastoreutils.newinstance(metastoreutils. java:1521)   ... 90 more caused by: java.lang.illegalargumentexception: java.net.urisyntaxexception: rela tive path in absolute uri: file:$%7btest.warehouse.dir%7d   @ org.apache.hadoop.fs.path.initialize(path.java:205)   @ org.apache.hadoop.fs.path.<init>(path.java:196)   @ org.apache.hadoop.hive.metastore.warehouse.getdnspath(warehouse.java:141)   @ org.apache.hadoop.hive.metastore.warehouse.getdnspath(warehouse.java:146)   @ org.apache.hadoop.hive.metastore.warehouse.getwhroot(warehouse.java:159)   @ org.apache.hadoop.hive.metastore.warehouse.getdefaultdatabasepath(warehouse .java:177)   @ org.apache.hadoop.hive.metastore.hivemetastore$hmshandler.createdefaultdb_c ore(hivemetastore.java:600)   @ org.apache.hadoop.hive.metastore.hivemetastore$hmshandler.createdefaultdb(h ivemetastore.java:620)   @ org.apache.hadoop.hive.metastore.hivemetastore$hmshandler.init(hivemetastor e.java:461)   @ org.apache.hadoop.hive.metastore.retryinghmshandler.<init>(retryinghmshandl er.java:66)   @ org.apache.hadoop.hive.metastore.retryinghmshandler.getproxy(retryinghmshan dler.java:72)   @ org.apache.hadoop.hive.metastore.hivemetastore.newretryinghmshandler(hiveme tastore.java:5762)   @ org.apache.hadoop.hive.metastore.hivemetastoreclient.<init>(hivemetastorecl ient.java:199)   @ org.apache.hadoop.hive.ql.metadata.sessionhivemetastoreclient.<init>(sessio nhivemetastoreclient.java:74)   ... 95 more caused by: java.net.urisyntaxexception: relative path in absolute uri: file:$%7b test.warehouse.dir%7d   @ java.net.uri.checkpath(unknown source)   @ java.net.uri.<init>(unknown source)   @ org.apache.hadoop.fs.path.initialize(path.java:202)   ... 108 more <console>:14: error: not found: value spark        import spark.implicits._               ^ <console>:14: error: not found: value spark        import spark.sql               ^ welcome       ____              __      / __/__  ___ _____/ /__     _\ \/ _ \/ _ `/ __/  '_/    /___/ .__/\_,_/_/ /_/\_\   version 2.2.0       /_/  using scala version 2.11.8 (java hotspot(tm) 64-bit server vm, java 1.8.0_141) type in expressions have them evaluated. type :help more information.  scala> 

i using scala 2.12.2, java 1.8.0, cassandra 3.1.1 versions. there other way can write sql query in scala?

thank you.

from imports understand you're using spark-cassandra-connector. in version compatibility section they've mentioned connector supports scala 2.10, 2.11 , cassandra 2.1.5*, 2.2, 3.0 spark 2.0, 2.1 latest version of connector.

so i'll suggest downgrade scala , cassandra versions , check if works.

next, i'll suggest change way youre trying access tables. datastax have provided different api connect cassandra. may find relevant documentation here.

you may spark 2.x,

val spark = sparksession.builder() .appname("casstest") .master("local[2]") .config("spark.cassandra.connection.host","**.*.**.***") .config("spark.cassandra.auth.username","****") .config("spark.cassandra.auth.password","****")  .getorcreate()  import spark.implicits._  val df = spark   .read   .format("org.apache.spark.sql.cassandra")   .options(map( "table" -> "words", "keyspace" -> "test" ))   .load() 

finally may df.show
note: hive-site.xml fix tried connect hive globally accessible metastore, different data store. so, not work cassandra.

let me know if helped. cheers.


Comments

Popular posts from this blog

node.js - Node js - Trying to send POST request, but it is not loading javascript content -

javascript - Replicate keyboard event with html button -

javascript - Web audio api 5.1 surround example not working in firefox -