03
import scala.io.{BufferedSource, Source}
/**
* 需求:计算每天的pv和uv
* pv:浏览次数
* uv:访客数
*/
object PVUVDemo {
def main(args: Array[String]): Unit = {
val source: BufferedSource = Source.fromFile("D:\\develop\\ideaWorkSpace\\myself\\study\\scalaDemo\\data\\pvuv.txt")
val list: List[String] = source.getLines().toList
//过滤一些脏数据
val filtered: List[String] = list.filter(line => {
val arr: Array[String] = line.split(",",-1)
arr.length >= 0 && !arr.exists(_.isEmpty)
})
val events: List[(String, String, String)] = filtered.map(line => {
val arr: Array[String] = line.split(",")
val date: String = arr(2).substring(0, 10)
//site1,user1,2018-03-01 02:12:22
(arr(0), arr(1),date)
})
//pv:该页面被浏览了多少次
val tuples: List[((String, String), String)] = events.map(tp => {
((tp._3, tp._1), tp._2)
})
val pv: Map[(String, String), Int] = tuples.groupBy(_._1).map(tp => (tp._1, tp._2.size))
val uv: Map[(String, String), Int] = tuples.groupBy(_._1).map(tp => (tp._1, tp._2.distinct.size))
println("============pv================")
pv.foreach(println)
println("============uv================")
uv.foreach(println)
}
}
线段重叠案例
data:
1,4
2,5
4,6
2,4
3,6
4,6
1,5
代码:
package com.doit.day03
import scala.io.Source
object LineDemo {
def main(args: Array[String]): Unit = {
val list: List[String] = Source.fromFile("D:\\develop\\ideaWorkSpace\\myself\\study\\scalaDemo\\data\\line.txt").getLines().toList
//生成一个个的点
val points: List[Range.Inclusive] = list.map(line => {
val arr: Array[String] = line.split(",")
val start: String = arr(0)
val end: String = arr(1)
//按照起始得位置确定这一行中一共有哪些点,后面才能判断哪些点是重合的
//根据开始和结束得点
val range: Range.Inclusive = start.toInt to end.toInt
range
})
//将点压平
val flattened: List[Int] = points.flatten
//对点分组
val intToInts: Map[Int, List[Int]] = flattened.groupBy(point => point)
//获取到最后的个数
val res: Map[Int, Int] = intToInts.map(tp => (tp._1, tp._2.size))
res.foreach(println)
}
}
|