首页 >> 大全

企业spark案例 —出租车轨迹分析

2023-11-20 大全 26 作者:考证青年

企业spark案例 —— 出租轨迹分析

文章目录

一、数据清洗

学习目标

1.如何使用 读取 CSV 文件

2.如何使用正则表达式清洗掉多余字符串。

将出租车轨迹数据规整化,清洗掉多余的字符串,并使用 .show() 打印输出。

清洗掉红框里面的 $ 、@ 字符,由于这两字符出现的次数没有规律,所以需要使用正则匹配。

清洗后内容如下:

import org.apache.spark.sql.SparkSessionobject Step1 {def main(args: Array[String]): Unit = {val spark = SparkSession.builder().appName("Step1").master("local").getOrCreate()/**********begin**********/val frame = spark.read.option("header", true).option("delimiter", "\t").csv("/root/data.csv")frame.createTempView("data")spark.udf.register("cleanData", (x: String) => {x.replaceAll("\\@+", "").replaceAll("\\$+", "")})spark.sql("""|select cleanData(TRIP_ID) as TRIP_ID,cleanData(CALL_TYPE) as CALL_TYPE,cleanData(ORIGIN_CALL) as ORIGIN_CALL,|cleanData(TAXI_ID) as TAXI_ID,cleanData(ORIGIN_STAND) as ORIGIN_STAND ,cleanData(TIMESTAMP) as TIMESTAMP,|cleanData(POLYLINE) as POLYLINE|from data""".stripMargin).show()/**********end**********/spark.stop()}
}

二、数据分析

使用完成数据分析

import com.alibaba.fastjson.JSON
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StringTypeobject Step2 {def main(args: Array[String]): Unit = {val spark = SparkSession.builder().appName("Step1").master("local").getOrCreate()spark.sparkContext.setLogLevel("error")/**********begin**********/val frame = spark.read.option("header", true).option("delimiter", "\t").csv("/root/data2.csv")frame.createTempView("data")//1.将时间戳转换成时间spark.sql("select TRIP_ID,CALL_TYPE,ORIGIN_CALL,TAXI_ID,ORIGIN_STAND,POLYLINE, from_unixtime(TIMESTAMP,'yyyy-MM-dd') as TIME from data").createTempView("data2")spark.sql("select * from data2").show()//2.将POLYLINE字段,分离出startLocation,endLocation 两个字段spark.udf.register("startLocation", (x: String) => {val arr = JSON.parseArray(x)arr.get(0).toString})spark.udf.register("endLocation", (x: String) => {val arr = JSON.parseArray(x)arr.get(arr.size() - 1).toString})spark.sql("""|select TRIP_ID,CALL_TYPE,ORIGIN_CALL,TAXI_ID,ORIGIN_STAND,POLYLINE,TIME,startLocation(POLYLINE) as startLocation,endLocation(POLYLINE) as endLocation  from data2""".stripMargin).createTempView("data3")spark.sql("select * from data3").show()//3.计算时长,行程的总行程时间定义为(点数-1)×15秒。// 例如,POLYLINE中具有101个数据点的行程具有(101-1* 15 = 1500秒的长度spark.udf.register("timeLen", (x: String) => {(JSON.parseArray(x).size() - 1) * 15})spark.sql("""|select TRIP_ID,CALL_TYPE,ORIGIN_CALL,TAXI_ID,ORIGIN_STAND,POLYLINE,TIME,startLocation(POLYLINE) as startLocation,endLocation(POLYLINE) as endLocation,timeLen(POLYLINE) as  timeLen  from data3""".stripMargin).createTempView("data4")spark.sql("select * from data4").show()//4.统计每天各种呼叫类型的数量并以CALL_TYPE,TIME升序排序spark.sql("""|select CALL_TYPE ,TIME,count(1) as num from data4 group by TIME,CALL_TYPE order by CALL_TYPE,TIME""".stripMargin).show()/**********end**********/spark.stop()}
}

三、出租车轨迹图表展示

使用 + 编写一个展示的图表程序:

对此你需要了解可视化分为前后端,也就是我们的MVC设计模式:

M层:

package net.educoder.app.mapper;import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Select;import java.util.List;@Mapper
public interface MainMapper {//参考@Select("SELECT _num from taxi_trend WHERE _taxi = #{type} ORDER BY _time")List<Integer> findTaxiTrendNumByType(String type);/**********begin**********/@Select("SELECT _time FROM taxi_trend GROUP BY _time ")List<String> findTaxiTrendTime();@Select("select _taxi from taxi_trend group by _taxi")List<String> findTaxiType();@Select("SELECT _type from taxi_servicenum GROUP BY _type")List<String> findTaxiPlatform();@Select("SELECT _serviceType FROM taxi_servicenum GROUP BY _serviceType ORDER BY _serviceType")List<String> findAllTaxiService();@Select("SELECT _num FROM taxi_servicenum WHERE _type = #{Platform} order BY _serviceType ")List<Integer> findServiceNumByPlatform(String Platform);    /**********end**********/
}

V层:

index.html

DOCTYPE html>
<html><head><meta charset="UTF-8"><title>title>
head>
<script src="echarts.min.js">script>
<script src="jquery-3.1.1.min.js">script><body>
<div id="main" style="width: 1000px;height:600px;">div>
<div id="main2" style="width: 1000px;height:600px;">div>
body>
<script>var myChart = echarts.init(document.getElementById('main'));$.ajax({/**********begin**********/url: "/Line_Chart",/**********end**********/success: function (data) {option = {title: {text: '各出租车平台年使用率'},tooltip: {trigger: 'axis'},legend: {data: ['A', 'B', 'C']},grid: {left: '3%',right: '4%',bottom: '3%',containLabel: true},toolbox: {feature: {saveAsImage: {}}},xAxis: {type: 'category',boundaryGap: false,/**********begin**********/data:data.timeList/**********end**********/},yAxis: {type: 'value'},/**********begin**********/series:data.resultData/**********end**********/};myChart.setOption(option);},dataType: "json",type: "post"});var myChart2 = echarts.init(document.getElementById('main2'));$.ajax({/**********begin**********/url:"/Radar_Chart",/**********end**********/success:function (data) {option = {title: {text: '各平台各服务数量'},tooltip: {},legend: {/**********begin**********/data:data.taxiPlatform/**********end**********/},radar: {name: {textStyle: {color: '#fff',backgroundColor: '#999',borderRadius: 3,padding: [3, 5]}},/**********begin**********/indicator:data.indicator/**********end**********/},series: [{type: 'radar',/**********begin**********/data:data.resultData/**********end**********/}]};myChart2.setOption(option);},dataType:"json",type:"post"});
script>html>

C层:

:

package net.educoder.app.controller;import net.educoder.app.entity.Chart_Line;
import net.educoder.app.entity.Chart_Radar;
import net.educoder.app.mapper.MainMapper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;@Controller
public class MainController {/**********begin**********/@AutowiredMainMapper mainMapper;@RequestMapping("/index")public String index() {return "index";}@RequestMapping("/Line_Chart")@ResponseBodypublic Map<String, Object> Line_Chart() {List<String> taxiType = mainMapper.findTaxiType();Map<String, Object> map = new HashMap<>();List<Chart_Line> resultList = new ArrayList<>();for (String s : taxiType) {List<Integer> list = mainMapper.findTaxiTrendNumByType(s);Chart_Line chart_line = new Chart_Line(s, "line", list);resultList.add(chart_line);}List<String> taxiTrendTimeList = mainMapper.findTaxiTrendTime();map.put("timeList", taxiTrendTimeList);map.put("resultData", resultList);return map;}@RequestMapping("/Radar_Chart")@ResponseBodypublic Map<String, Object> Radar_Chart() {Map<String, Object> map = new HashMap<>();List<String> allTaxiService = mainMapper.findAllTaxiService();List<HashMap<String, Object>> indicatorList = new ArrayList<>();for (String s : allTaxiService) {HashMap<String, Object> stringIntegerHashMap = new HashMap<>();stringIntegerHashMap.put("name", s);stringIntegerHashMap.put("max", 100);indicatorList.add(stringIntegerHashMap);}List<String> taxiPlatform = mainMapper.findTaxiPlatform();List<Chart_Radar> resultList = new ArrayList<>();for (String s : taxiPlatform) {List<Integer> serviceNumByPlatform = mainMapper.findServiceNumByPlatform(s);Chart_Radar chart_radar = new Chart_Radar(s, serviceNumByPlatform);resultList.add(chart_radar);}map.put("resultData", resultList);map.put("legendData", taxiPlatform);map.put("indicator", indicatorList);return map;}/**********end**********/
}

关于我们

最火推荐

小编推荐

联系我们


版权声明:本站内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 88@qq.com 举报,一经查实,本站将立刻删除。备案号:桂ICP备2021009421号
Powered By Z-BlogPHP.
复制成功
微信号:
我知道了