系统的目标有两个:
一是行业分类;
二是每个行业的情感值计算;
①因为是基金推荐项目,分析了国民经济分类和同花顺,最终采用同花顺的66个行业分类为标准。
②行业分类依据词,词库采用从百度词库、搜狗细胞词库手工分析下载而来,下载而来的是未解析的格式,
我这里以‘深蓝词库转换’这个工具进行转换成对应的66个行业的66个txt,然后再用scala代码解析到mysql中,在MySQL中以words为索引建好表。
建表语句如下:
-- 以名词关联查看结果SELECT w.`innersessionId`,w.`words`,c.`username`,c.`category` FROM `words2` w JOIN category cON w.`words`=c.`username` WHERE w.`sensi`='n'-- 创建category 初步结果存储表CREATE TABLE `category` (`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,`editdate` char(20) NOT NULL DEFAULT "20170622", `number` CHAR(100) NOT NULL DEFAULT "spell", `category` CHAR(100) NOT NULL DEFAULT "综合", `words` CHAR(100) NOT NULL DEFAULT "name", PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8;ALTER TABLE `category` ADD INDEX(`words`);-- 创建category2 带双索引的表作为分类结果存储CREATE TABLE `category2` (`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,`editdate` char(20) NOT NULL DEFAULT "20170622", `number` CHAR(100) NOT NULL DEFAULT "spell", `category` CHAR(100) NOT NULL DEFAULT "综合", `words` CHAR(100) NOT NULL DEFAULT "name", PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8;ALTER TABLE `category` ADD INDEX(`words`);--- 填充到分类结果表INSERT INTO category2(editdate,number,category,words) SELECT DISTINCT DATE,number,category,username FROM category;--- 创建源数据存储表CREATE TABLE title(id INT(12) UNSIGNED NOT NULL AUTO_INCREMENT,bizNo VARCHAR(100),bizType VARCHAR(100),bizTypeEnum VARCHAR(100),channelEnum VARCHAR(100),channelId VARCHAR(100),channelType VARCHAR(100),collectTime VARCHAR(100),hostIp VARCHAR(100),hostName VARCHAR(100),innerSessionId VARCHAR(100),logNo VARCHAR(100),merchantNo VARCHAR(100),modelNo VARCHAR(100),operateType VARCHAR(100),operateTypeEnum VARCHAR(100),PROCESS VARCHAR(100),productNo VARCHAR(100),protocolVersion VARCHAR(100),receiveTime VARCHAR(100),sessionId VARCHAR(100),sourcetitle VARCHAR(100),systemCode VARCHAR(100),taskType VARCHAR(100),TIME VARCHAR(100),title VARCHAR(200), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8;val pp = value.map(p => { val v0 = p.getString(0).toInt val v1 = p.getInt(1).toString val v2 = p.getString(2) val v3 = p.getString(3) (v0,v1,v2,v3) })--- 创建解析titleCREATE TABLE titlesplit(id INT(12) UNSIGNED NOT NULL AUTO_INCREMENT,innserSessionid VARCHAR(50),times VARCHAR(50),channelType VARCHAR(50),sourcetitle VARCHAR(500),title VARCHAR(500),words VARCHAR(500),characters VARCHAR(150),refer VARCHAR(150),role VARCHAR(150),PRIMARY KEY(id)) DEFAULT CHARSET=utf8;ALTER TABLE `titlesplit` ADD INDEX(`words`)---情感词-极值表CREATE table layer(id INT(12) UNSIGNED NOT NULL AUTO_INCREMENT,words VARCHAR(500),value decimal(10,7),PRIMARY KEY(id)) DEFAULT CHARSET=utf8;ALTER TABLE `layer` ADD INDEX(`words`)--- 查询关键词对应的情感值select l.id,l.`words`,t.`innserSessionid`,t.`times`,t.`id`,t.`words`,t.`refer`,t.`role`,l.`VALUE` from layer l join titlesplit t on l.`words`=t.`words`--- 创建结果表CREATE TABLE `connect` (`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,`innserSessionid` VARCHAR(50), `sourcetitle` varCHAR(500), `times` varCHAR(50) , `words` varCHAR(500) ,`characters` varchar(150), `refer` varchar(150) , `role` varCHAR(150), `category` CHAR(100), `number` CHAR(100),`VALUE` decimal(10,7), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8;ALTER TABLE `connect` ADD INDEX(`words`);SELECT titlesplit.`innserSessionid`,titlesplit.`sourcetitle`,titlesplit.`times`,titlesplit.`words`,titlesplit.`refer`,titlesplit.`role`,category.`category`,category.`number`,layer.`VALUE` FROM titlesplit LEFT JOIN category ON titlesplit.`words`=category.`words`LEFT JOIN layer ON titlesplit.`words`=layer.`words`;CREATE TABLE `CsenticNetlayer2` (`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,`words` VARCHAR(250), `attitude` varCHAR(150), `expression` varCHAR(150) , `extreme` varCHAR(500) ,`VALUE` decimal(10,7), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8;ALTER TABLE `connect` ADD INDEX(`words`);-- 划分主题逻辑1、极值情感词不能成为主题词 SELECT t.`innserSessionid`,t.`words`,l.`words`,l2.`words`,c.`category`,t.`characters` FROM titlesplit t JOIN category c ON t.`words`=c.`words` LEFT JOIN layer l ON t.`words`=l.`words` LEFT JOIN CsenticNetlayer2 l2 ON t.`words`=l2.`words` WHERE l.`words` IS NULL AND l2.`words` IS NULL AND t.`characters`='n' 2、去除地名 SELECT t.`words`,t.`characters`,t.`refer`,t.`role` FROM `titlesplit` t WHERE t.`characters`='ns' -- 地名3、词性分类-- b j n nh ni nz ws-- 25325 387SELECT DISTINCT t.`sourcetitle`,t.`characters`,t.`words` FROM titlesplit t WHERE t.`characters`='b' OR t.`characters`='j'OR t.`characters`='nh' OR t.`characters`='ni' OR t.`characters`='nz' OR t.`characters`='ws';-- b j n nh ni nz ws-- 25325 387SELECT MIN(c.`id`),t.`sourcetitle`,t.`characters`,t.`words`,c.`category` FROM titlesplit t JOIN category c ON t.`words`=c.`words`WHERE t.`characters`='b' OR t.`characters`='j'OR t.`characters`='nh' OR t.`characters`='ni' OR t.`characters`='nz' OR t.`characters`='ws'AND LENGTH(t.`sourcetitle`)>16GROUP BY t.`sourcetitle`,t.`characters`,t.`words`4、筛选主语和相关词性SELECT t.`innserSessionid`,t.`times`,t.`sourcetitle`,t.words,t.`characters`,t.`refer`,t.`role` FROM `titlesplit` t WHERE (t.`characters`='n' OR t.`characters`='ni') AND role='SBV'5SELECT t.`innserSessionid`,t.`times`,t.`sourcetitle`,t.words,t.`characters`,t.`refer`,t.`role`,c.`id`,c.`category` FROM `titlesplit` t JOIN category cON t.`words`=c.`words`WHERE (t.`characters`='n' OR t.`characters`='ni') AND role='SBV';-- 行业划分 -- select t.`innserSessionid`,t.`id`,c.`category` from -- categorys c join titlesplit t on c.`words`=t.`words` where c.`pos` Like "n%" -- layer词性划分 1613SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN layer l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t WHERE t.sm IS NOT NULL -- layer词性划分 1820SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN layer l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t-- `CsenticNetlayer2`词性划分 1170SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN CsenticNetlayer2 l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t WHERE t.sm IS NOT NULL -- `CsenticNetlayer2`词性划分 1820SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN CsenticNetlayer2 l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t-- 关联SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm1,t.sm2 FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm1,SUM(l2.`VALUE`) sm2 FROM `titlesplit` t LEFT JOIN CsenticNetlayer2 l ON t.`words`=l.words LEFT JOIN layer l2 ON t.`words`=l2.`words` GROUP BY t.`innserSessionid`,t.`sourcetitle`)tSELECT DISTINCT t.`innserSessionid`,t.`words`,s.`classification`,s.`value` FROM `titlesplit` t JOIN `sentiment` s ON s.`words`=t.`words`;SELECT DISTINCT t.`innserSessionid` FROM `titlesplit` t;SELECT * FROM `sentiment` s WHERE s.`words`='坦诚';SELECT * FROM `titlesplit` t WHERE t.`innserSessionid`='6D238824ECD84D1C9BF42B2920E3FE9C'
将66个行业词库导入到MySQL中的scala代码如下
import java.sql.{DriverManager, ResultSet}import org.apache.spark.SparkContextimport org.apache.spark.SparkConfobject category { val rl= "jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456&useUnicode=true&characterEncoding=utf8&autoReconnect=true&failOverReadOnly=false" classOf[com.mysql.jdbc.Driver] val conn = DriverManager.getConnection(rl) def main(args: Array[String]) { val conf = new SparkConf().setMaster("local").setAppName("test") val sc = new SparkContext(conf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) // val role = "jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456&useUnicode=true&characterEncoding=utf8&autoReconnect=true&failOverReadOnly=false" val log881101 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881101.种植业与林业.txt") val format = new java.text.SimpleDateFormat("yyyyMMdd") val date = format.format(new java.util.Date().getTime()).toString val rowRDD881101= log881101.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val s881101 = rowRDD881101.map(line => (date, "881101", "种植业与林业", line._4)) import sqlContext.implicits._ val s2881101= s881101.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881101= sqlContext.sql("select distinct editdate,number,category,words from category") val p881101= value881101.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881101.foreach(p => { val v0=p._1 val v1=p._2 val v2=p._3 val v3=p._4 insert(v0,v1,v2,v3) }) val log881102 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881102.养殖业.txt") val rowRDDlog881102=log881102.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881102=rowRDDlog881102.map(line=>(date,"881102","养殖业",line._4)) import sqlContext.implicits._ val s2881102= slog881102.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881102= sqlContext.sql("select distinct editdate,number,category,words from category") val p881102= value881102.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881102.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881103 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881103.农产品加工.txt") val rowRDDlog881103=log881103.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881103=rowRDDlog881103.map(line=>(date,"881103","农产品加工",line._4)) import sqlContext.implicits._ val s2881103= slog881103.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881103= sqlContext.sql("select distinct editdate,number,category,words from category") val p881103= value881103.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881103.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881104 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881104.农业服务.txt") val rowRDDlog881104=log881104.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881104=rowRDDlog881104.map(line=>(date,"881104","农业服务",line._4)) import sqlContext.implicits._ val s2881104= slog881104.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881104= sqlContext.sql("select distinct editdate,number,category,words from category") val p881104= value881104.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881104.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881105 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881105.煤炭开采.txt") val rowRDDlog881105=log881105.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881105=rowRDDlog881105.map(line=>(date,"881105","煤炭开采",line._4)) import sqlContext.implicits._ val s2881105= slog881105.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881105= sqlContext.sql("select distinct editdate,number,category,words from category") val p881105= value881105.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881105.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881106 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881106.石油矿业开采.txt") val rowRDDlog881106=log881106.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881106=rowRDDlog881106.map(line=>(date,"881106","石油矿业开采",line._4)) import sqlContext.implicits._ val s2881106= slog881106.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881106= sqlContext.sql("select distinct editdate,number,category,words from category") val p881106= value881106.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881106.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881107 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881107.采掘服务.txt") val rowRDDlog881107=log881107.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881107=rowRDDlog881107.map(line=>(date,"881107","采掘服务",line._4)) import sqlContext.implicits._ val s2881107= slog881107.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881107= sqlContext.sql("select distinct editdate,number,category,words from category") val p881107= value881107.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881107.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881108 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881108.基础化学.txt") val rowRDDlog881108=log881108.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881108=rowRDDlog881108.map(line=>(date,"881108","基础化学",line._4)) import sqlContext.implicits._ val s2881108= slog881108.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881108= sqlContext.sql("select distinct editdate,number,category,words from category") val p881108= value881108.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881108.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881109 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881109.化学制品.txt") val rowRDDlog881109=log881109.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881109=rowRDDlog881109.map(line=>(date,"881109","化学制品",line._4)) import sqlContext.implicits._ val s2881109= slog881109.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881109= sqlContext.sql("select distinct editdate,number,category,words from category") val p881109= value881109.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881109.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881110 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881110.化工合成材料.txt") val rowRDDlog881110=log881110.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881110=rowRDDlog881110.map(line=>(date,"881110","化工合成材料",line._4)) import sqlContext.implicits._ val s2881110= slog881110.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881110= sqlContext.sql("select distinct editdate,number,category,words from category") val p881110= value881110.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881110.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881111 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881111.化工新材料.txt") val rowRDDlog881111=log881111.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881111=rowRDDlog881111.map(line=>(date,"881111","化工新材料",line._4)) import sqlContext.implicits._ val s2881111= slog881111.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881111= sqlContext.sql("select distinct editdate,number,category,words from category") val p881111= value881111.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881111.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881112 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881112.钢铁.txt") val rowRDDlog881112=log881112.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881112=rowRDDlog881112.map(line=>(date,"881112","钢铁",line._4)) import sqlContext.implicits._ val s2881112= slog881112.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881112= sqlContext.sql("select distinct editdate,number,category,words from category") val p881112= value881112.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881112.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881113 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881113.有色冶炼加工.txt") val rowRDDlog881113=log881113.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881113=rowRDDlog881113.map(line=>(date,"881113","有色冶炼加工",line._4)) import sqlContext.implicits._ val s2881113= slog881113.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881113= sqlContext.sql("select distinct editdate,number,category,words from category") val p881113= value881113.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881113.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881114 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881114.新材料.txt") val rowRDDlog881114=log881114.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881114=rowRDDlog881114.map(line=>(date,"881114","新材料",line._4)) import sqlContext.implicits._ val s2881114= slog881114.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881114= sqlContext.sql("select distinct editdate,number,category,words from category") val p881114= value881114.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881114.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881115 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881115.建筑材料.txt") val rowRDDlog881115=log881115.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881115=rowRDDlog881115.map(line=>(date,"881115","建筑材料",line._4)) import sqlContext.implicits._ val s2881115= slog881115.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881115= sqlContext.sql("select distinct editdate,number,category,words from category") val p881115= value881115.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881115.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881116 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881116.建筑装饰.txt") val rowRDDlog881116=log881116.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881116=rowRDDlog881116.map(line=>(date,"881116","建筑材料",line._4)) import sqlContext.implicits._ val s2881116= slog881116.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881116= sqlContext.sql("select distinct editdate,number,category,words from category") val p881116= value881116.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881116.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881117 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881117.通用设备.txt") val rowRDDlog881117=log881117.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881117=rowRDDlog881117.map(line=>(date,"881117","通用设备",line._4)) import sqlContext.implicits._ val s2881117= slog881117.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881117= sqlContext.sql("select distinct editdate,number,category,words from category") val p881117= value881117.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881117.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881118 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881118.专用设备.txt") val rowRDDlog881118=log881118.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881118=rowRDDlog881118.map(line=>(date,"881118","专用设备",line._4)) import sqlContext.implicits._ val s2881118= slog881118.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881118= sqlContext.sql("select distinct editdate,number,category,words from category") val p881118= value881118.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881118.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881119 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881119.仪器仪表.txt") val rowRDDlog881119=log881119.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881119=rowRDDlog881119.map(line=>(date,"881119","仪器仪表",line._4)) import sqlContext.implicits._ val s2881119= slog881119.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881119= sqlContext.sql("select distinct editdate,number,category,words from category") val p881119= value881119.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881119.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881120 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881120.电气设备.txt") val rowRDDlog881120=log881120.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881120=rowRDDlog881120.map(line=>(date,"881120","电气设备",line._4)) import sqlContext.implicits._ val s2881120= slog881120.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881120= sqlContext.sql("select distinct editdate,number,category,words from category") val p881120= value881120.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881120.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881121 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881121.半导体及元件.txt") val rowRDDlog881121=log881121.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881121=rowRDDlog881121.map(line=>(date,"881121","半导体及元件",line._4)) import sqlContext.implicits._ val s2881121= slog881121.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881121= sqlContext.sql("select distinct editdate,number,category,words from category") val p881121= value881121.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881121.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881122 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881122.光学光电子.txt") val rowRDDlog881122=log881122.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881122=rowRDDlog881122.map(line=>(date,"881122","光学光电子",line._4)) import sqlContext.implicits._ val s2881122= slog881122.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881122= sqlContext.sql("select distinct editdate,number,category,words from category") val p881122= value881122.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881122.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881123 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881123.其他电子.txt") val rowRDDlog881123=log881123.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881123=rowRDDlog881123.map(line=>(date,"881123","其他电子",line._4)) import sqlContext.implicits._ val s2881123= slog881123.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881123= sqlContext.sql("select distinct editdate,number,category,words from category") val p881123= value881123.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881123.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881124 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881124.电子制造.txt") val rowRDDlog881124=log881124.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881124=rowRDDlog881124.map(line=>(date,"881124","电子制造",line._4)) import sqlContext.implicits._ val s2881124= slog881124.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881124= sqlContext.sql("select distinct editdate,number,category,words from category") val p881124= value881124.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881124.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881125 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881125.汽车整车.txt") val rowRDDlog881125=log881125.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881125=rowRDDlog881125.map(line=>(date,"881125","汽车整车",line._4)) import sqlContext.implicits._ val s2881125= slog881125.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881125= sqlContext.sql("select distinct editdate,number,category,words from category") val p881125= value881125.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881125.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881126 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881126.汽车零部件.txt") val rowRDDlog881126=log881126.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881126=rowRDDlog881126.map(line=>(date,"881126","汽车零部件",line._4)) import sqlContext.implicits._ val s2881126= slog881126.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881126= sqlContext.sql("select distinct editdate,number,category,words from category") val p881126= value881126.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881126.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881127 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881127.非汽车交运.txt") val rowRDDlog881127=log881127.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881127=rowRDDlog881127.map(line=>(date,"881127","非汽车交运",line._4)) import sqlContext.implicits._ val s2881127= slog881127.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881127= sqlContext.sql("select distinct editdate,number,category,words from category") val p881127= value881127.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881127.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881128 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881128.交运设备服务.txt") val rowRDDlog881128=log881128.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881128=rowRDDlog881128.map(line=>(date,"881128","交运设备服务",line._4)) import sqlContext.implicits._ val s2881128= slog881128.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881128= sqlContext.sql("select distinct editdate,number,category,words from category") val p881128= value881128.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881128.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881129 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881129.通信设备.txt") val rowRDDlog881129=log881129.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881129=rowRDDlog881129.map(line=>(date,"881129","通信设备",line._4)) import sqlContext.implicits._ val s2881129= slog881129.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881129= sqlContext.sql("select distinct editdate,number,category,words from category") val p881129= value881129.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881129.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881130 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881130.计算机设备.txt") val rowRDDlog881130=log881130.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881130=rowRDDlog881130.map(line=>(date,"881130","计算机设备",line._4)) import sqlContext.implicits._ val s2881130= slog881130.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881130= sqlContext.sql("select distinct editdate,number,category,words from category") val p881130= value881130.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881130.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881131 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881131.白色家电.txt") val rowRDDlog881131 = log881131.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881131 = rowRDDlog881131.map(line => (date, "881131", "白色家电", line._4)) import sqlContext.implicits._ val s2881131 = slog881131.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881131 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881131 = value881131.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881131.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881132 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881132.视听器材.txt") val rowRDDlog881132 = log881132.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881132 = rowRDDlog881132.map(line => (date, "881132", "视听器材", line._4)) import sqlContext.implicits._ val s2881132 = slog881132.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881132 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881132 = value881132.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881132.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881133 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881133.饮料制造.txt") val rowRDDlog881133 = log881133.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881133 = rowRDDlog881133.map(line => (date, "881133", "饮料制造", line._4)) import sqlContext.implicits._ val s2881133 = slog881133.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881133 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881133 = value881133.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881133.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881134 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881134.食品加工制造.txt") val rowRDDlog881134 = log881134.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881134 = rowRDDlog881134.map(line => (date, "881134", "食品加工制造", line._4)) import sqlContext.implicits._ val s2881134 = slog881134.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881134 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881134 = value881134.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881134.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881135 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881135.纺织制造.txt") val rowRDDlog881135 = log881135.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881135 = rowRDDlog881135.map(line => (date, "881135", "纺织制造", line._4)) import sqlContext.implicits._ val s2881135 = slog881135.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881135 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881135 = value881135.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881135.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881136 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881136.服装家纺.txt") val rowRDDlog881136 = log881136.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881136 = rowRDDlog881136.map(line => (date, "881136", "服装家纺", line._4)) import sqlContext.implicits._ val s2881136 = slog881136.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881136 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881136 = value881136.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881136.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881137 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881137.造纸.txt") val rowRDDlog881137 = log881137.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881137 = rowRDDlog881137.map(line => (date, "881137", "造纸", line._4)) import sqlContext.implicits._ val s2881137 = slog881137.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881137 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881137 = value881137.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881137.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881138 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881138.包装印刷.txt") val rowRDDlog881138 = log881138.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881138 = rowRDDlog881138.map(line => (date, "881138", "包装印刷", line._4)) import sqlContext.implicits._ val s2881138 = slog881138.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881138 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881138 = value881138.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881138.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881139 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881139.家用轻工.txt") val rowRDDlog881139 = log881139.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881139 = rowRDDlog881139.map(line => (date, "881139", "家用轻工", line._4)) import sqlContext.implicits._ val s2881139 = slog881139.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881139 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881139 = value881139.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881139.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881140 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881140.化学制药.txt") val rowRDDlog881140 = log881140.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881140 = rowRDDlog881140.map(line => (date, "881140", "化学制药", line._4)) import sqlContext.implicits._ val s2881140 = slog881140.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881140 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881140 = value881140.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881140.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881141 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881141.中药.txt") val rowRDDlog881141 = log881141.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881141 = rowRDDlog881141.map(line => (date, "881141", "中药", line._4)) import sqlContext.implicits._ val s2881141 = slog881141.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881141 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881141 = value881141.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881141.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881142 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881142.生物制品.txt") val rowRDDlog881142 = log881142.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881142 = rowRDDlog881142.map(line => (date, "881142", "生物制品", line._4)) import sqlContext.implicits._ val s2881142 = slog881142.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881142 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881142 = value881142.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881142.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881143 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881143.医药商业.txt") val rowRDDlog881143 = log881143.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881143 = rowRDDlog881143.map(line => (date, "881143", "医药商业", line._4)) import sqlContext.implicits._ val s2881143 = slog881143.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881143 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881143 = value881143.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881143.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881144 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881144.医疗器械服务.txt") val rowRDDlog881144 = log881144.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881144 = rowRDDlog881144.map(line => (date, "881144", "医疗器械服务", line._4)) import sqlContext.implicits._ val s2881144 = slog881144.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881144 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881144 = value881144.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881144.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881145 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881145.电力.txt") val rowRDDlog881145 = log881145.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881145 = rowRDDlog881145.map(line => (date, "881145", "电力", line._4)) import sqlContext.implicits._ val s2881145 = slog881145.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881145 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881145 = value881145.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881145.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881146 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881146.燃气水务.txt") val rowRDDlog881146 = log881146.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881146 = rowRDDlog881146.map(line => (date, "881146", "燃气水务", line._4)) import sqlContext.implicits._ val s2881146 = slog881146.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881146 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881146 = value881146.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881146.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881147 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881147.环保工程.txt") val rowRDDlog881147 = log881147.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881147 = rowRDDlog881147.map(line => (date, "881147", "环保工程", line._4)) import sqlContext.implicits._ val s2881147 = slog881147.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881147 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881147 = value881147.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881147.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881148 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881148.港口航运.txt") val rowRDDlog881148 = log881148.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881148 = rowRDDlog881148.map(line => (date, "881148", "港口航运", line._4)) import sqlContext.implicits._ val s2881148 = slog881148.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881148 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881148 = value881148.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881148.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881149 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881149.公路铁路运输.txt") val rowRDDlog881149 = log881149.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881149 = rowRDDlog881149.map(line => (date, "881149", "公路铁路运输", line._4)) import sqlContext.implicits._ val s2881149 = slog881149.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881149 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881149 = value881149.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881149.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881150 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881150.公交.txt") val rowRDDlog881150 = log881150.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881150 = rowRDDlog881150.map(line => (date, "881150", "公交", line._4)) import sqlContext.implicits._ val s2881150 = slog881150.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881150 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881150 = value881150.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881150.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881151 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881151.机场航运.txt") val rowRDDlog881151 = log881151.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881151 = rowRDDlog881151.map(line => (date, "881151", "机场航运", line._4)) import sqlContext.implicits._ val s2881151 = slog881151.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881151 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881151 = value881151.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881151.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881152 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881152.物流.txt") val rowRDDlog881152 = log881152.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881152 = rowRDDlog881152.map(line => (date, "881152", "物流", line._4)) import sqlContext.implicits._ val s2881152 = slog881152.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881152 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881152 = value881152.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881152.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881153 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881153.房地产开发.txt") val rowRDDlog881153 = log881153.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881153 = rowRDDlog881153.map(line => (date, "881153", "房地产开发", line._4)) import sqlContext.implicits._ val s2881153 = slog881153.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881153 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881153 = value881153.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881153.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881154 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881154.园区开发.txt") val rowRDDlog881154 = log881154.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881154 = rowRDDlog881154.map(line => (date, "881154", "园区开发", line._4)) import sqlContext.implicits._ val s2881154 = slog881154.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881154 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881154 = value881154.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881154.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881155 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881155.银行.txt") val rowRDDlog881155 = log881155.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881155 = rowRDDlog881155.map(line => (date, "881155", "银行", line._4)) import sqlContext.implicits._ val s2881155 = slog881155.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881155 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881155 = value881155.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881155.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881156 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881156.保险及其他.txt") val rowRDDlog881156 = log881156.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881156 = rowRDDlog881156.map(line => (date, "881156", "保险及其他", line._4)) import sqlContext.implicits._ val s2881156 = slog881156.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881156 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881156 = value881156.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881156.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881157 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881157.证券.txt") val rowRDDlog881157 = log881157.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881157 = rowRDDlog881157.map(line => (date, "881157", "证券", line._4)) import sqlContext.implicits._ val s2881157 = slog881157.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881157 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881157 = value881157.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881157.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881158 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881158.零售.txt") val rowRDDlog881158 = log881158.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881158 = rowRDDlog881158.map(line => (date, "881158", "零售", line._4)) import sqlContext.implicits._ val s2881158 = slog881158.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881158 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881158 = value881158.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881158.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881159 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881159.贸易.txt") val rowRDDlog881159 = log881159.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881159 = rowRDDlog881159.map(line => (date, "881159", "贸易", line._4)) val s2881159 = slog881159.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881159 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881159 = value881159.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881159.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881160 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881160.景点及旅游.txt") val rowRDDlog881160 = log881160.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881160 = rowRDDlog881160.map(line => (date, "881160", "景点及旅游", line._4)) import sqlContext.implicits._ val s2881160 = slog881160.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881160 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881160 = value881160.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881160.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881161 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881161.酒店及餐饮.txt") val rowRDDlog881161 = log881161.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881161 = rowRDDlog881161.map(line => (date, "881161", "酒店及餐饮", line._4)) import sqlContext.implicits._ val s2881161 = slog881161.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881161 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881161 = value881161.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881161.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881162 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881162.通信服务.txt") val rowRDDlog881162 = log881162.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881162 = rowRDDlog881162.map(line => (date, "881162", "通信服务", line._4)) import sqlContext.implicits._ val s2881162 = slog881162.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881162 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881162 = value881162.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881162.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881163 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881163.计算机应用.txt") val rowRDDlog881163 = log881163.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881163 = rowRDDlog881163.map(line => (date, "881163", "计算机应用", line._4)) import sqlContext.implicits._ val s2881163 = slog881163.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881163 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881163 = value881163.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881163.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881164 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881164.传媒.txt") val rowRDDlog881164 = log881164.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881164 = rowRDDlog881164.map(line => (date, "881164", "传媒", line._4)) import sqlContext.implicits._ val s2881164 = slog881164.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881164 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881164 = value881164.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881164.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881165 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881165.综合.txt") val rowRDDlog881165 = log881165.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881165 = rowRDDlog881165.map(line => (date, "881165", "综合", line._4)) import sqlContext.implicits._ val s2881165 = slog881165.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881165 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881165 = value881165.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881165.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881166 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881166.国防军工.txt") val rowRDDlog881166 = log881166.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881166 = rowRDDlog881166.map(line => (date, "881166", "国防军工", line._4)) import sqlContext.implicits._ val s2881166 = slog881166.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881166 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881166 = value881166.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881166.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) // 解析结果去重后插入category2 val job = sqlContext.jdbc("jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456", "category") val jo = job.toDF().registerTempTable("job") val value = sqlContext.sql("select distinct editdate,number,category,words from job") val p = value.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert2(v0, v1, v2, v3) }) conn.close() } def insert(value0: String, value1: String, value2: String, value3: String): Unit = { //val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE) // CREATE TABLE words2(innersessionId VARCHAR(100),words VARCHAR(100), VARCHAR(100),posit VARCHAR(100),va VARCHAR(100)) try { val prep = conn.prepareStatement("INSERT INTO category(editdate,number,category,words) VALUES (?,?,?,?) ") prep.setString(1, value0) prep.setString(2, value1) prep.setString(3, value2) prep.setString(4, value3) prep.executeUpdate } catch { case e: Exception => e.printStackTrace } } def insert2(value0: String, value1: String, value2: String, value3: String): Unit = { //val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE) // CREATE TABLE words2(innersessionId VARCHAR(100),words VARCHAR(100), VARCHAR(100),posit VARCHAR(100),va VARCHAR(100)) try { val prep = conn.prepareStatement("INSERT INTO category2(editdate,number,category,words) VALUES (?,?,?,?) ") prep.setString(1, value0) prep.setString(2, value1) prep.setString(3, value2) prep.setString(4, value3) prep.executeUpdate } catch { case e: Exception => e.printStackTrace } }}