spark离线分析--本地Spark1.6版本读写hive表

发布时间 2023-11-28 21:12:03作者: 技术虫

1. 搭建好hive环境,并将hive-site.xml文件放到本地工程的resources目录下

2. java测试代码

  

public class SparkHiveTest{

     public static void main(String[] args){
  
 JavaSparkContext javaSparkContext  = null;
try {
      SparkContext sparkConf =new 
      SparkConf().setAppName("SparkHiveTest").setMaster("local[*]");

    javaSparkContext   = new JavaSparkContext(sparkConf );
     HiveContext hiveContext = new HiveContext(javaSparkContext )
     //查询attack_count表数据,表结构 content ,ctime, content是json字符串,ctime是分区格式,yyyyMMdd
     string sql ="select * from test.attack_count order by ctime desc"
     DataFrame result = hiveContext .sql(sql);

     List<Row> collectAsList = result.coalesec(10).collectAsList();

     result.registerTempTable("AttackCount");
     String curDate = DateUtil.toString(new Date(), "yyyyMMdd");
     hiveContext.sql("insert into table test.attack_count partition(ctime='"+curDate+"') select content from AttackCount limit 1");
      
       }catch(Exception e){
     }finally{
     if(javaSparkContext!=null) {
      javaSparkContext.stop();
     }
  }
}