By Salerno | October 6, 2022
1) Introduction
This a set of exercises that around the SQL Intermediate course that I’ve studied at DataCamp (which I really recommend!).
In fact, the databases used there was different that I’ve used here. In other to accomplish the goals of practice as you study, I studied a lot of new concepts of SQL language and I’ve tried to use in a different environment (RStudio) and a new dataset.
I hope you enjoy a lot as I do!
# 1) Important packages ----
library(DBI)
library(dplyr)
library(odbc)
# 2) Defining a PostgreSQL DS ----
ds_postgresql <- odbcListDataSources()[[1]][4]
# 3) Checking all drivers ----
drv_all <- sort(unique(odbcListDrivers()[[1]]))
# 4) Defining a PostgreSQL drive ----
drv_postgresql <- drv_all[9]
# 5) Connection ----
con <- dbConnect(odbc::odbc(),
dsn = ds_postgresql,
server = "localhost",
uid = "postgres",
database= "rstudio_test")
# 6) Read back the full table
dtab = dbGetQuery(con, 'select * from gapminder')
summary(dtab)
## country continent year lifeexp
## Length:1704 Length:1704 Min. :1952 Min. :23.60
## Class :character Class :character 1st Qu.:1966 1st Qu.:48.20
## Mode :character Mode :character Median :1980 Median :60.71
## Mean :1980 Mean :59.47
## 3rd Qu.:1993 3rd Qu.:70.85
## Max. :2007 Max. :82.60
## pop gdppercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
# 7) filter
dtab = dbGetQuery(con, 'select country from gapminder')
summary(dtab)
## country
## Length:1704
## Class :character
## Mode :character
head(dtab)
## country
## 1 Afghanistan
## 2 Afghanistan
## 3 Afghanistan
## 4 Afghanistan
## 5 Afghanistan
## 6 Afghanistan
# 8) WHERE clause with number
dtab = dbGetQuery(con, 'select country, lifeexp
from gapminder
where lifeexp > 75')
summary(dtab)
## country lifeexp
## Length:173 Min. :75.01
## Class :character 1st Qu.:76.15
## Mode :character Median :77.42
## Mean :77.68
## 3rd Qu.:78.82
## Max. :82.60
head(dtab)
## country lifeexp
## 1 Albania 75.651
## 2 Albania 76.423
## 3 Argentina 75.320
## 4 Australia 76.320
## 5 Australia 77.560
## 6 Australia 78.830
# 9) WHERE clause with text
dtab = dbGetQuery(con, "select country, lifeexp
from gapminder
where country = 'Argentina'")
summary(dtab)
## country lifeexp
## Length:12 Min. :62.48
## Class :character 1st Qu.:65.51
## Mode :character Median :69.21
## Mean :69.06
## 3rd Qu.:72.22
## Max. :75.32
head(dtab)
## country lifeexp
## 1 Argentina 62.485
## 2 Argentina 64.399
## 3 Argentina 65.142
## 4 Argentina 65.634
## 5 Argentina 67.065
## 6 Argentina 68.481
# 10) WHERE clause with AND
dtab = dbGetQuery(con, "select *
from gapminder
where country = 'Argentina' and lifeexp > 65 and pop > 25000000")
summary(dtab)
## country continent year lifeexp
## Length:7 Length:7 Min. :1977 Min. :68.48
## Class :character Class :character 1st Qu.:1984 1st Qu.:70.36
## Mode :character Mode :character Median :1992 Median :71.87
## Mean :1992 Mean :72.00
## 3rd Qu.:2000 3rd Qu.:73.81
## Max. :2007 Max. :75.32
## pop gdppercap
## Min. :26983828 Min. : 8798
## 1st Qu.:30481146 1st Qu.: 9069
## Median :33958947 Median : 9308
## Mean :33820225 Mean :10010
## 3rd Qu.:37267292 3rd Qu.:10523
## Max. :40301927 Max. :12779
head(dtab)
## country continent year lifeexp pop gdppercap
## 1 Argentina Americas 1977 68.481 26983828 10079.027
## 2 Argentina Americas 1982 69.942 29341374 8997.897
## 3 Argentina Americas 1987 70.774 31620918 9139.671
## 4 Argentina Americas 1992 71.868 33958947 9308.419
## 5 Argentina Americas 1997 73.275 36203463 10967.282
## 6 Argentina Americas 2002 74.340 38331121 8797.641
# 11) WHERE clause with OR
dtab = dbGetQuery(con, "select country, continent, year
from gapminder
where year = 1977 or year = 1982")
summary(dtab)
## country continent year
## Length:284 Length:284 Min. :1977
## Class :character Class :character 1st Qu.:1977
## Mode :character Mode :character Median :1980
## Mean :1980
## 3rd Qu.:1982
## Max. :1982
head(dtab)
## country continent year
## 1 Afghanistan Asia 1977
## 2 Afghanistan Asia 1982
## 3 Albania Europe 1977
## 4 Albania Europe 1982
## 5 Algeria Africa 1977
## 6 Algeria Africa 1982
# 12) WHERE clause with and using two attributes
dtab = dbGetQuery(con,
"select country, continent, pop, lifeexp
from gapminder
where (pop > 1000000 and pop < 1500000)
and (lifeexp > 65 and lifeexp < 71)")
summary(dtab)
## country continent pop lifeexp
## Length:12 Length:12 Min. :1039009 Min. :65.57
## Class :character Class :character 1st Qu.:1086304 1st Qu.:68.81
## Mode :character Mode :character Median :1127290 Median :69.40
## Mean :1145466 Mean :69.08
## 3rd Qu.:1158281 3rd Qu.:69.76
## Max. :1489518 Max. :70.74
head(dtab)
## country continent pop lifeexp
## 1 Kuwait Asia 1140357 69.343
## 2 Mauritius Africa 1042663 68.740
## 3 Mauritius Africa 1096202 69.745
## 4 Mauritius Africa 1149818 70.736
## 5 Slovenia Europe 1489518 65.570
## 6 Trinidad and Tobago Americas 1039009 68.300
# 13) WHERE clause and BETWEEN
dtab = dbGetQuery(con,
'select country, continent, year, pop
from gapminder
where year between 1990 and 1995')
summary(dtab)
## country continent year pop
## Length:142 Length:142 Min. :1992 Min. :1.259e+05
## Class :character Class :character 1st Qu.:1992 1st Qu.:3.606e+06
## Mode :character Mode :character Median :1992 Median :8.689e+06
## Mean :1992 Mean :3.599e+07
## 3rd Qu.:1992 3rd Qu.:2.271e+07
## Max. :1992 Max. :1.165e+09
head(dtab)
## country continent year pop
## 1 Afghanistan Asia 1992 16317921
## 2 Albania Europe 1992 3326498
## 3 Algeria Africa 1992 26298373
## 4 Angola Africa 1992 8735988
## 5 Argentina Americas 1992 33958947
## 6 Australia Oceania 1992 17481977
# 14) WHERE clause with LIKE
dtab = dbGetQuery(con,
"select *
from gapminder
where country like 'A%'" )
summary(dtab)
## country continent year lifeexp
## Length:84 Length:84 Min. :1952 Min. :28.80
## Class :character Class :character 1st Qu.:1966 1st Qu.:42.04
## Mode :character Mode :character Median :1980 Median :67.27
## Mean :1980 Mean :59.95
## 3rd Qu.:1993 3rd Qu.:72.04
## Max. :2007 Max. :81.23
## pop gdppercap
## Min. : 1282697 Min. : 635.3
## 1st Qu.: 7101494 1st Qu.: 2724.7
## Median :10830537 Median : 5380.7
## Mean :13774802 Mean : 8777.0
## 3rd Qu.:19562728 3rd Qu.:10954.1
## Max. :40301927 Max. :36126.5
head(dtab)
## country continent year lifeexp pop gdppercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
# 15) WHERE clause with LIKE
dtab = dbGetQuery(con,
"select country, pop
from gapminder
where country like 'A%'")
summary(dtab)
## country pop
## Length:84 Min. : 1282697
## Class :character 1st Qu.: 7101494
## Mode :character Median :10830537
## Mean :13774802
## 3rd Qu.:19562728
## Max. :40301927
head(dtab)
## country pop
## 1 Afghanistan 8425333
## 2 Afghanistan 9240934
## 3 Afghanistan 10267083
## 4 Afghanistan 11537966
## 5 Afghanistan 13079460
## 6 Afghanistan 14880372
# 16) WHERE clause with LIKE
dtab = dbGetQuery(con,
"select country, pop
from gapminder
where country like '%n'")
summary(dtab)
## country pop
## Length:192 Min. : 120447
## Class :character 1st Qu.: 1559128
## Mode :character Median : 8288454
## Mean : 23030281
## 3rd Qu.: 28308158
## Max. :169270617
head(dtab)
## country pop
## 1 Afghanistan 8425333
## 2 Afghanistan 9240934
## 3 Afghanistan 10267083
## 4 Afghanistan 11537966
## 5 Afghanistan 13079460
## 6 Afghanistan 14880372
# 17) WHERE clause with NOT LIKE
dtab = dbGetQuery(con,
"select country, pop
from gapminder
where country not like 'A%'")
summary(dtab)
## country pop
## Length:1620 Min. :6.001e+04
## Class :character 1st Qu.:2.682e+06
## Mode :character Median :6.671e+06
## Mean :3.042e+07
## 3rd Qu.:1.962e+07
## Max. :1.319e+09
head(dtab)
## country pop
## 1 Bahrain 120447
## 2 Bahrain 138655
## 3 Bahrain 171863
## 4 Bahrain 202182
## 5 Bahrain 230800
## 6 Bahrain 297410
# 18) WHERE clause with IN
dtab = dbGetQuery(con,
"select country, continent
from gapminder
where continent in ('Africa', 'Europe')")
summary(dtab)
## country continent
## Length:984 Length:984
## Class :character Class :character
## Mode :character Mode :character
head(dtab)
## country continent
## 1 Albania Europe
## 2 Albania Europe
## 3 Albania Europe
## 4 Albania Europe
## 5 Albania Europe
## 6 Albania Europe
# 19) Using arithmetic clauses
# numerical data only
# aggregate functions execute in a column, otherwise arithmetic functions calcultes through the rows (events)
dtab = dbGetQuery(con,
'SELECT AVG(pop)
FROM gapminder')
dtab
## avg
## 1 29601212
# 20) numerical data only
dtab = dbGetQuery(con,
'SELECT SUM(pop)
FROM gapminder')
dtab
## sum
## 1 50440465801
# 21) various data types
dtab = dbGetQuery(con,
'SELECT MIN(pop)
FROM gapminder')
dtab
## min
## 1 60011
# 22) various data types
dtab = dbGetQuery(con,
'SELECT MIN(country)
FROM gapminder')
dtab
## min
## 1 Afghanistan
# 23) various data types
dtab = dbGetQuery(con,
'SELECT MAX(country)
FROM gapminder')
dtab
## max
## 1 Zimbabwe
# 24) using a negative parameter in ROUND you will obtain a round number two decimals to the left
dtab = dbGetQuery(con,
"select ROUND(AVG(pop), -2)
from gapminder
where year = 1957")
dtab
## round
## 1 18763400
# 25) GROUP BY and ORDER BY
dtab = dbGetQuery(con,
"select year, continent, country, min(pop)
from gapminder
group by year, continent, country
order by year, min desc")
head(dtab)
## year continent country min
## 1 1952 Asia China 556263527
## 2 1952 Asia India 372000000
## 3 1952 Americas United States 157553000
## 4 1952 Asia Japan 86459025
## 5 1952 Asia Indonesia 82052000
## 6 1952 Europe Germany 69145952
# 26) GROUP BY, HAVING and ORDER BY
dtab = dbGetQuery(con,
"select country, avg(lifeexp) as life_exp_avg
from gapminder
group by country
having avg(lifeexp) > 70
order by life_exp_avg desc")
dtab
## country life_exp_avg
## 1 Iceland 76.51142
## 2 Sweden 76.17700
## 3 Norway 75.84300
## 4 Netherlands 75.64850
## 5 Switzerland 75.56508
## 6 Canada 74.90275
## 7 Japan 74.82692
## 8 Australia 74.66292
## 9 Denmark 74.37017
## 10 France 74.34892
## 11 Spain 74.20342
## 12 Italy 74.01383
## 13 New Zealand 73.98950
## 14 United Kingdom 73.92258
## 15 Greece 73.73317
## 16 Israel 73.64583
## 17 Belgium 73.64175
## 18 Hong Kong, China 73.49283
## 19 United States 73.47850
## 20 Germany 73.44442
## 21 Austria 73.10325
## 22 Ireland 73.01725
## 23 Finland 72.99192
## 24 Puerto Rico 72.73933
## 25 Slovenia 71.60075
## 26 Czech Republic 71.51050
## 27 Singapore 71.22025
## 28 Cuba 71.04508
## 29 Uruguay 70.78158
## 30 Slovak Republic 70.69608
## 31 Portugal 70.41983
## 32 Taiwan 70.33667
## 33 Montenegro 70.29917
## 34 Costa Rica 70.18142
## 35 Poland 70.17692
## 36 Croatia 70.05592
comments powered by Disqus