Desafio 07

Author

Nicole de Barros Silva

Aula 07 - SQLite: inserindo dados + dbplyr

Slide 02

library(RSQLite)
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.3.3
Warning: package 'ggplot2' was built under R version 4.3.3
Warning: package 'tidyr' was built under R version 4.3.3
Warning: package 'readr' was built under R version 4.3.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if(!"discoCopy.db" %in% list.files(".")){
file.copy("disco.db",
"discoCopy.db")
}
[1] TRUE
db <- dbConnect(SQLite(),
"discoCopy.db")

Slide 04

dbListTables(db)
 [1] "albums"          "artists"         "customers"       "employees"      
 [5] "genres"          "invoice_items"   "invoices"        "media_types"    
 [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
[13] "tracks"         
dbExecute(db,
"CREATE TABLE instruments
(AlbumId INTEGER,
TrackId INTEGER,
ElectricGuitar INTEGER,
Singer INTEGER,
Trumpet INTEGER)")
[1] 0

Slide 05

dbListFields(db, 'instruments')
[1] "AlbumId"        "TrackId"        "ElectricGuitar" "Singer"        
[5] "Trumpet"       
dbExecute(db, "DROP TABLE instruments")
[1] 0
dbListTables(db)
 [1] "albums"          "artists"         "customers"       "employees"      
 [5] "genres"          "invoice_items"   "invoices"        "media_types"    
 [9] "playlist_track"  "playlists"       "sqlite_sequence" "sqlite_stat1"   
[13] "tracks"         

Slide 07

sql = paste("SELECT ArtistId FROM artists",
"WHERE Name = ?")
query <- dbSendQuery(db, sql)
dbBind(query, list("Gilberto Gil"))
aId <- dbFetch(query)
dbClearResult(query)
# Segundo passo interno, não deve causar problema
sql = paste('SELECT Title FROM albums',
'WHERE ArtistId =', aId)
dbGetQuery(db, sql)
                                     Title
1                 As Canções de Eu Tu Eles
2             Quanta Gente Veio Ver (Live)
3 Quanta Gente Veio ver--Bônus De Carnaval

Slide 08 # Caso tenha excluído a tabela instruments, execute este chunk.

dbExecute(db,
"CREATE TABLE instruments
(AlbumId INTEGER,
TrackId INTEGER,
ElectricGuitar INTEGER,
Singer INTEGER,
Trumpet INTEGER)")
[1] 0
dbListFields(db,'instruments')
[1] "AlbumId"        "TrackId"        "ElectricGuitar" "Singer"        
[5] "Trumpet"       
sql = paste('SELECT TrackId, Name FROM tracks',
'WHERE AlbumId = 85')
dbGetQuery(db, sql)
   TrackId                  Name
1     1073   Óia Eu Aqui De Novo
2     1074        Baião Da Penha
3     1075   Esperando Na Janela
4     1076              Juazeiro
5     1077   Último Pau-De-Arara
6     1078            Asa Branca
7     1079          Qui Nem Jiló
8     1080           Assum Preto
9     1081          Pau-De-Arara
10    1082 A Volta Da Asa Branca
11    1083  O Amor Daqui De Casa
12    1084    As Pegadas Do Amor
13    1085     Lamento Sertanejo
14    1086         Casinha Feliz

Slide 09

dbExecute(db,
"INSERT INTO instruments
VALUES ('85', '1075', 0, 1, 0),
('85', '1078', 0, 1, 0); ")
[1] 2
dbGetQuery(db, "SELECT * FROM instruments")
  AlbumId TrackId ElectricGuitar Singer Trumpet
1      85    1075              0      1       0
2      85    1078              0      1       0

Slide 10

dbWriteTable(db, "mtcars", mtcars)
dbListTables(db)
 [1] "albums"          "artists"         "customers"       "employees"      
 [5] "genres"          "instruments"     "invoice_items"   "invoices"       
 [9] "media_types"     "mtcars"          "playlist_track"  "playlists"      
[13] "sqlite_sequence" "sqlite_stat1"    "tracks"         
dbGetQuery(db, "SELECT * FROM mtcars") 
    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

Slide 11

theAvgCar <- mtcars %>%
summarise_all(function(x) round(mean(x), 2))
theAvgCar
    mpg  cyl   disp     hp drat   wt  qsec   vs   am gear carb
1 20.09 6.19 230.72 146.69  3.6 3.22 17.85 0.44 0.41 3.69 2.81
dbWriteTable(db, "mtcars", theAvgCar, append = TRUE)
dbGetQuery(db, "SELECT * FROM mtcars") %>% tail(3)
     mpg  cyl   disp     hp drat   wt  qsec   vs   am gear carb
31 15.00 8.00 301.00 335.00 3.54 3.57 14.60 0.00 1.00 5.00 8.00
32 21.40 4.00 121.00 109.00 4.11 2.78 18.60 1.00 1.00 4.00 2.00
33 20.09 6.19 230.72 146.69 3.60 3.22 17.85 0.44 0.41 3.69 2.81

Slide 12

dbWriteTable(db, "mtcars", mtcars, overwrite = TRUE)
dbGetQuery(db, "SELECT * FROM mtcars")
    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

Slide 13

res <- dbSendQuery(db,
"SELECT * FROM mtcars WHERE cyl = 4")
while(!dbHasCompleted(res)){
chunk <- dbFetch(res, n = 5)
print(nrow(chunk))
}
[1] 5
[1] 5
[1] 1
#Apenas como exemplo
dbClearResult(res)

Slide 14

dbDisconnect(db)
if("discoCopy.db" %in% list.files(".")){
file.remove("discoCopy.db")
}
[1] TRUE

Slide 15

#install.packages("vroom")
library(vroom)
Warning: package 'vroom' was built under R version 4.3.3

Attaching package: 'vroom'
The following objects are masked from 'package:readr':

    as.col_spec, col_character, col_date, col_datetime, col_double,
    col_factor, col_guess, col_integer, col_logical, col_number,
    col_skip, col_time, cols, cols_condense, cols_only, date_names,
    date_names_lang, date_names_langs, default_locale, fwf_cols,
    fwf_empty, fwf_positions, fwf_widths, locale, output_column,
    problems, spec
airports <- read_csv("airports.csv"
, col_types = "cccccdd")
airlines <- read_csv("airlines.csv"
, col_types = "cc")
air <- dbConnect(SQLite(), dbname="air.db")
dbWriteTable(air, name = "airports"
, airports)
dbWriteTable(air, name = "airlines"
, airlines)
dbListTables(air)
[1] "airlines" "airports"

Slide 16

dbDisconnect(air)
if("air.db" %in% list.files(".")){
file.remove("air.db")
}
[1] TRUE

Slide 17

library(dbplyr)

Attaching package: 'dbplyr'
The following objects are masked from 'package:dplyr':

    ident, sql
db <- dbConnect(SQLite(), "disco.db") # original
tracks <- tbl(db, "tracks") # dplyr
tracks
# Source:   table<tracks> [?? x 9]
# Database: sqlite 3.43.2 [\\smb\ra238935\WindowsDesktop\ME315\Desafios\disco.db]
   TrackId Name         AlbumId MediaTypeId GenreId Composer Milliseconds  Bytes
     <int> <chr>          <int>       <int>   <int> <chr>           <int>  <int>
 1       1 For Those A…       1           1       1 Angus Y…       343719 1.12e7
 2       2 Balls to th…       2           2       1 <NA>           342562 5.51e6
 3       3 Fast As a S…       3           2       1 F. Balt…       230619 3.99e6
 4       4 Restless an…       3           2       1 F. Balt…       252051 4.33e6
 5       5 Princess of…       3           2       1 Deaffy …       375418 6.29e6
 6       6 Put The Fin…       1           1       1 Angus Y…       205662 6.71e6
 7       7 Let's Get I…       1           1       1 Angus Y…       233926 7.64e6
 8       8 Inject The …       1           1       1 Angus Y…       210834 6.85e6
 9       9 Snowballed         1           1       1 Angus Y…       203102 6.60e6
10      10 Evil Walks         1           1       1 Angus Y…       263497 8.61e6
# ℹ more rows
# ℹ 1 more variable: UnitPrice <dbl>

Slide 18

meanTracks <- tracks %>%
group_by(AlbumId) %>%
summarise(AvLen = mean(Milliseconds, na.rm = TRUE),
AvCost = mean(UnitPrice, na.rm = TRUE))
meanTracks
# Source:   SQL [?? x 3]
# Database: sqlite 3.43.2 [\\smb\ra238935\WindowsDesktop\ME315\Desafios\disco.db]
   AlbumId   AvLen AvCost
     <int>   <dbl>  <dbl>
 1       1 240042.   0.99
 2       2 342562    0.99
 3       3 286029.   0.99
 4       4 306657.   0.99
 5       5 294114.   0.99
 6       6 265456.   0.99
 7       7 270780.   0.99
 8       8 207638.   0.99
 9       9 333926.   0.99
10      10 280551.   0.99
# ℹ more rows

Slide 19

meanTracks %>% show_query()
<SQL>
SELECT `AlbumId`, AVG(`Milliseconds`) AS `AvLen`, AVG(`UnitPrice`) AS `AvCost`
FROM `tracks`
GROUP BY `AlbumId`

Slide 20

mT <- meanTracks %>% collect()
mT
# A tibble: 347 × 3
   AlbumId   AvLen AvCost
     <int>   <dbl>  <dbl>
 1       1 240042.   0.99
 2       2 342562    0.99
 3       3 286029.   0.99
 4       4 306657.   0.99
 5       5 294114.   0.99
 6       6 265456.   0.99
 7       7 270780.   0.99
 8       8 207638.   0.99
 9       9 333926.   0.99
10      10 280551.   0.99
# ℹ 337 more rows
dbDisconnect(db)