tom|www.baidu.com?page=1|2019-10-28 13:40:20
jim|www.sohu.com?page=1|2019-10-28 13:40:25
susan|www.zhihu.com?page=1|2019-10-28 13:40:28
tom|www.baidu.com?page=2|2019-10-28 13:40:32
tom|www.baidu.com?page=3|2019-10-28 13:40:36
susan|www.zhihu.com?page=2|2019-10-28 13:40:40
jim|www.sohu.com?page=2|2019-10-28 13:40:50
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
object Need4 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[1]").setAppName(this.getClass.getSimpleName)
val sc = new SparkContext(conf)
val sQLContext = new SQLContext(sc)
import sQLContext.implicits._
val sorceFile = sc.textFile("E:\\五道口大数据\\吉祥spark开始\\20200204\\sparkSQL第一天需求\\需求4\\pageaccess.txt")
val df = sorceFile.map(line => {
val split = line.split("[|]")
// val pvtime = split(2).split(":")(2)
(split(0), split(1),split(2))
}).toDF("name", "url", "time")
df.createTempView("user")
sQLContext.sql("select name,url,time ,lead(time,1,0) over (partition by name order by time asc) endtime," +
"UNIX_TIMESTAMP(lead(time,1,0) over(partition by name order by time),'yyyy-MM-dd HH:mm:ss')- UNIX_TIMESTAMP(time,'yyyy-MM-dd HH:mm:ss') period " +
"from user " ).show()
//.coalesce(1).write.csv("D:\\djz\\20200204\\a")
/**
* select user,wz,nvl(sum(period),0) totol_peroid from(select user,dl,UNIX_TIMESTAMP(lead(dl) over(partition by user order by dl),'yyyy-MM-dd HH:mm:ss')- UNIX_TIMESTAMP(dl,'yyyy-MM-dd HH:mm:ss') period,wz from dop) a group by wz,user")
*/
//sQLContext.sql("select name,url,nvl(sum(time)) total_time from (select name,time,UNIX_TIMESTAMP(lead(time) over(partition by name order by time),'yyyy-MM-dd HH:mm:ss')- UNIX_TIMESTAMP(time,'yyyy-MM-dd HH:mm:ss') period,url from user").show()
sc.stop()
}
}