计算每个省份的网页访问量。保存到问价里。
忽略:将结果放入到mysql中。
access.log
先把ip转换成10进制。ip -> longip-> t_access
ip.txt
取出来三个字段,startip、endip、province ->t_ip
select province,count(1)
from t_access
join
t_ip on longip between startip and endip
group by province
import java.util.Properties
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
object Need1 {
def ip2Long(ip:String) = {
val fragments = ip.split("[.]")
var Ip_Num = 0L
for (i <- 0 until fragments.length){
Ip_Num = fragments(i).toLong | Ip_Num < {
val splits = line.split("[|]")
val strIp = splits(1)
ip2Long(strIp)
}).toDF("longIp")
df1.createTempView("t_access")
val sorceFile2: RDD[String] = sc.textFile("E:\\20200204\\sparkSQL第一天需求\\需求1\\ip.txt")
val df2: DataFrame = sorceFile2.map(line => {
val splits = line.split("[|]")
(splits(2).toLong,splits(3).toLong,splits(6))
}).toDF("startIP","endIP","province")
df2.createTempView("t_ip")
val result = sQLContext.sql(
"""
select province,count(province) total from t_access join t_ip on longIp between startIP and endIP
group by province
order by total desc
""".stripMargin)
val url:String="jdbc:mysql://localhost:3306/stu?characterEncoding=utf-8&serverTimezone=Asia/Shanghai"
val table:String="access"
val conn = new Properties()
conn.setProperty("user","root")
conn.setProperty("password","123")
conn.setProperty("driver","com.mysql.jdbc.Driver")
result.write.mode(SaveMode.Overwrite).jdbc(url,table,conn)
sc.stop()
}
}