Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a6375ba

Browse files
authored
Create Insights from Data with BigQuery: Challenge Lab
1 parent b486189 commit a6375ba

File tree

1 file changed

+327
-0
lines changed

1 file changed

+327
-0
lines changed
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
Task 1:- make a changes in data according
2+
3+
SELECT sum(cumulative_confirmed) as total_cases_worldwide
4+
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
5+
WHERE date='2020-05-25'
6+
7+
8+
===========================================================================================================================================================================
9+
10+
Task 2:- Worst affected areas
11+
12+
13+
with deaths_by_states as (
14+
15+
SELECT subregion1_name as state, sum(cumulative_deceased) as death_count
16+
17+
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
18+
19+
where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL
20+
21+
group by subregion1_name
22+
)
23+
24+
select count(*) as count_of_states
25+
26+
from deaths_by_states
27+
28+
where death_count > 300
29+
30+
31+
32+
===========================================================================================================================================================================
33+
34+
Task 3:-Identifying hotspots
35+
36+
37+
SELECT * FROM (
38+
39+
SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases
40+
41+
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
42+
43+
WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL
44+
45+
GROUP BY subregion1_name
46+
47+
ORDER BY total_confirmed_cases DESC
48+
)
49+
WHERE total_confirmed_cases > 3000
50+
51+
52+
53+
===========================================================================================================================================================================
54+
55+
Task 4:- Fatality ratio
56+
57+
NOTE :- look carefully in may there are 31 day and april 30 so please make the changes according
58+
59+
60+
SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio
61+
62+
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
63+
64+
where country_name="Italy" AND date BETWEEN '2020-05-01'and '2020-05-31'
65+
66+
67+
68+
===========================================================================================================================================================================
69+
70+
Task 5:- Identifying specific day
71+
72+
73+
SELECT date
74+
75+
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
76+
77+
where country_name="Italy" and cumulative_deceased>16000
78+
79+
order by date asc
80+
81+
limit 1
82+
83+
84+
85+
86+
===========================================================================================================================================================================
87+
88+
89+
Task 6:- Finding days with zero net new cases
90+
91+
NOTE:- please check the start date and end date carefully
92+
93+
94+
WITH india_cases_by_date AS (
95+
96+
SELECT
97+
98+
date,
99+
100+
SUM( cumulative_confirmed ) AS cases
101+
102+
FROM
103+
104+
`bigquery-public-data.covid19_open_data.covid19_open_data`
105+
106+
WHERE
107+
108+
country_name ="India"
109+
110+
AND date between '2020-02-23' and '2020-03-11'
111+
112+
GROUP BY
113+
114+
date
115+
116+
ORDER BY
117+
118+
date ASC
119+
120+
)
121+
122+
, india_previous_day_comparison AS
123+
124+
(SELECT
125+
126+
date,
127+
128+
cases,
129+
130+
LAG(cases) OVER(ORDER BY date) AS previous_day,
131+
132+
cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases
133+
134+
FROM india_cases_by_date
135+
136+
)
137+
138+
select count(*)
139+
140+
from india_previous_day_comparison
141+
142+
where net_new_cases=0
143+
144+
145+
146+
147+
===========================================================================================================================================================================
148+
149+
Task 7:-
150+
151+
152+
WITH us_cases_by_date AS (
153+
154+
SELECT
155+
156+
date,
157+
158+
SUM(cumulative_confirmed) AS cases
159+
160+
FROM
161+
162+
`bigquery-public-data.covid19_open_data.covid19_open_data`
163+
164+
WHERE
165+
166+
country_name="United States of America"
167+
168+
AND date between '2020-03-22' and '2020-04-20'
169+
170+
GROUP BY
171+
172+
date
173+
174+
ORDER BY
175+
176+
date ASC
177+
178+
)
179+
180+
181+
182+
, us_previous_day_comparison AS
183+
184+
(SELECT
185+
186+
date,
187+
188+
cases,
189+
190+
LAG(cases) OVER(ORDER BY date) AS previous_day,
191+
192+
cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,
193+
194+
(cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increase
195+
196+
FROM us_cases_by_date
197+
198+
)
199+
200+
201+
202+
select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Cases
203+
204+
from us_previous_day_comparison
205+
206+
where percentage_increase > 5
207+
208+
209+
210+
===========================================================================================================================================================================
211+
212+
Task 8:-
213+
214+
215+
WITH cases_by_country AS (
216+
217+
SELECT
218+
219+
country_name AS country,
220+
221+
sum(cumulative_confirmed) AS cases,
222+
223+
sum(cumulative_recovered) AS recovered_cases
224+
225+
FROM
226+
227+
bigquery-public-data.covid19_open_data.covid19_open_data
228+
229+
WHERE
230+
231+
date = '2020-05-10'
232+
233+
GROUP BY
234+
235+
country_name
236+
237+
)
238+
239+
240+
241+
, recovered_rate AS
242+
243+
(SELECT
244+
245+
country, cases, recovered_cases,
246+
247+
(recovered_cases * 100)/cases AS recovery_rate
248+
249+
FROM cases_by_country
250+
251+
)
252+
253+
254+
255+
SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate
256+
257+
FROM recovered_rate
258+
259+
WHERE cases > 50000
260+
261+
ORDER BY recovery_rate desc
262+
263+
LIMIT 5
264+
265+
266+
267+
268+
===========================================================================================================================================================================
269+
270+
Task 9:- Here we change the date
271+
272+
WITH
273+
france_cases AS (
274+
SELECT
275+
date,
276+
SUM(cumulative_confirmed) AS total_cases
277+
FROM
278+
`bigquery-public-data.covid19_open_data.covid19_open_data`
279+
WHERE
280+
country_name="France"
281+
AND date IN ('2020-01-24',
282+
'2020-04-10')
283+
GROUP BY
284+
date
285+
ORDER BY
286+
date)
287+
, summary as (
288+
SELECT
289+
total_cases AS first_day_cases,
290+
LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases,
291+
DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff
292+
FROM
293+
france_cases
294+
LIMIT 1
295+
)
296+
select first_day_cases, last_day_cases, days_diff, POWER((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr
297+
from summary
298+
299+
300+
301+
===========================================================================================================================================================================
302+
303+
Task 10:- Create a Looker Studio report
304+
305+
306+
SELECT
307+
308+
date, SUM(cumulative_confirmed) AS country_cases,
309+
310+
SUM(cumulative_deceased) AS country_deaths
311+
312+
FROM
313+
314+
`bigquery-public-data.covid19_open_data.covid19_open_data`
315+
316+
WHERE
317+
318+
date BETWEEN '2020-03-20'
319+
320+
AND '2020-04-23'
321+
322+
AND country_name ="United States of America"
323+
324+
GROUP BY date
325+
326+
327+

0 commit comments

Comments
 (0)