SELECT
COUNT(*) pv
FROM
(
SELECT
cookieid,
userid,
to_date(DATETIME) day1
FROM
ods.tracklog_5min
WHERE
DAY>='20151001'
AND DAY<='20151031'
AND lower(requesturl) IN ('http://chat.hexun.com/',
'http://zhibo.hexun.com/'))t1
INNER JOIN
(
SELECT
cookieid,
to_date(DATETIME) day2
FROM
ods.tracklog_5min
WHERE
DAY>='20151001'
AND DAY<='20151031'
AND ((
lower(requesturl) LIKE 'http://zhibo.hexun.com/%'
OR lower(requesturl) LIKE 'http://chat.hexun.com/%')
AND requesturl LIKE '%/default.html%'))t2
ON
t1.cookieid=t2.cookieid
AND t1.day1=t2.day2
INNER JOIN
(
SELECT
cookieid,
to_date(DATETIME) day3
FROM
ods.tracklog_5min
WHERE
DAY>='20151001'
AND DAY<='20151031'
AND ( (
lower(requesturl) LIKE 'http://px.hexun.com/%'
AND lower(requesturl) LIKE '%/default.html%' )
OR (
lower(requesturl) LIKE 'http://px.hexun.com/pack/%'
AND lower(requesturl) LIKE '%.html%' )
OR (
lower(requesturl) LIKE 'http://px.hexun.com/p/%'
AND lower(requesturl) LIKE '%.html%' ) ))t3
ON
t1.cookieid=t3.cookieid
AND t1.day1=t3.day3
LEFT JOIN
stage.saleplatform_productvisitdetail_temp t4
ON
t1.userid=t4.userid
WHERE
t4.createtime>t1.day1
OR t4.userid IS NULL;
可以看,上面的SQL针对同一源表的数据查询了三次,浪费了系统的资源,相同的源完全可以通用。
2、优化后的SQL
抽出公共数据
create table default.tracklog_10month as
select * from ods.tracklog_5min
WHERE DAY>='20151001' AND DAY<='20151031';
利用临时表,替换原SQL的公共部分:
SELECT
COUNT(*) pv
FROM
(
SELECT
cookieid,
userid,
to_date(DATETIME) day1
FROM
default.tracklog_10month
WHERE
lower(requesturl) IN ('http://chat.hexun.com/',
'http://zhibo.hexun.com/'))t1
INNER JOIN
(
SELECT
cookieid,
to_date(DATETIME) day2
FROM
default.tracklog_10month
WHERE (lower(requesturl) LIKE 'http://zhibo.hexun.com/%'
OR lower(requesturl) LIKE 'http://chat.hexun.com/%')
AND requesturl LIKE '%/default.html%')t2
ON
t1.cookieid=t2.cookieid
AND t1.day1=t2.day2
INNER JOIN
(
SELECT
cookieid,
to_date(DATETIME) day3
FROM
default.tracklog_10month
WHERE
( (
lower(requesturl) LIKE 'http://px.hexun.com/%'
AND lower(requesturl) LIKE '%/default.html%' )
OR (
lower(requesturl) LIKE 'http://px.hexun.com/pack/%'
AND lower(requesturl) LIKE '%.html%' )
OR (
lower(requesturl) LIKE 'http://px.hexun.com/p/%'
AND lower(requesturl) LIKE '%.html%' ) ))t3
ON
t1.cookieid=t3.cookieid
AND t1.day1=t3.day3
LEFT JOIN
stage.saleplatform_productvisitdetail_temp t4
ON
t1.userid=t4.userid
WHERE
t4.createtime>t1.day1
OR t4.userid IS NULL;