Aula 07 - SQLite: inserindo dados + dbplyr
Slide 02
library (RSQLite)
library (tidyverse)
Warning: package 'tidyverse' was built under R version 4.3.3
Warning: package 'ggplot2' was built under R version 4.3.3
Warning: package 'tidyr' was built under R version 4.3.3
Warning: package 'readr' was built under R version 4.3.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (! "discoCopy.db" %in% list.files ("." )){
file.copy ("disco.db" ,
"discoCopy.db" )
}
db <- dbConnect (SQLite (),
"discoCopy.db" )
Slide 04
[1] "albums" "artists" "customers" "employees"
[5] "genres" "invoice_items" "invoices" "media_types"
[9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
[13] "tracks"
dbExecute (db,
"CREATE TABLE instruments
(AlbumId INTEGER,
TrackId INTEGER,
ElectricGuitar INTEGER,
Singer INTEGER,
Trumpet INTEGER)" )
Slide 05
dbListFields (db, 'instruments' )
[1] "AlbumId" "TrackId" "ElectricGuitar" "Singer"
[5] "Trumpet"
dbExecute (db, "DROP TABLE instruments" )
[1] "albums" "artists" "customers" "employees"
[5] "genres" "invoice_items" "invoices" "media_types"
[9] "playlist_track" "playlists" "sqlite_sequence" "sqlite_stat1"
[13] "tracks"
Slide 07
sql = paste ("SELECT ArtistId FROM artists" ,
"WHERE Name = ?" )
query <- dbSendQuery (db, sql)
dbBind (query, list ("Gilberto Gil" ))
aId <- dbFetch (query)
dbClearResult (query)
# Segundo passo interno, não deve causar problema
sql = paste ('SELECT Title FROM albums' ,
'WHERE ArtistId =' , aId)
dbGetQuery (db, sql)
Title
1 As Canções de Eu Tu Eles
2 Quanta Gente Veio Ver (Live)
3 Quanta Gente Veio ver--Bônus De Carnaval
Slide 08 # Caso tenha excluído a tabela instruments, execute este chunk.
dbExecute (db,
"CREATE TABLE instruments
(AlbumId INTEGER,
TrackId INTEGER,
ElectricGuitar INTEGER,
Singer INTEGER,
Trumpet INTEGER)" )
dbListFields (db,'instruments' )
[1] "AlbumId" "TrackId" "ElectricGuitar" "Singer"
[5] "Trumpet"
sql = paste ('SELECT TrackId, Name FROM tracks' ,
'WHERE AlbumId = 85' )
dbGetQuery (db, sql)
TrackId Name
1 1073 Óia Eu Aqui De Novo
2 1074 Baião Da Penha
3 1075 Esperando Na Janela
4 1076 Juazeiro
5 1077 Último Pau-De-Arara
6 1078 Asa Branca
7 1079 Qui Nem Jiló
8 1080 Assum Preto
9 1081 Pau-De-Arara
10 1082 A Volta Da Asa Branca
11 1083 O Amor Daqui De Casa
12 1084 As Pegadas Do Amor
13 1085 Lamento Sertanejo
14 1086 Casinha Feliz
Slide 09
dbExecute (db,
"INSERT INTO instruments
VALUES ('85', '1075', 0, 1, 0),
('85', '1078', 0, 1, 0); " )
dbGetQuery (db, "SELECT * FROM instruments" )
AlbumId TrackId ElectricGuitar Singer Trumpet
1 85 1075 0 1 0
2 85 1078 0 1 0
Slide 10
dbWriteTable (db, "mtcars" , mtcars)
dbListTables (db)
[1] "albums" "artists" "customers" "employees"
[5] "genres" "instruments" "invoice_items" "invoices"
[9] "media_types" "mtcars" "playlist_track" "playlists"
[13] "sqlite_sequence" "sqlite_stat1" "tracks"
dbGetQuery (db, "SELECT * FROM mtcars" )
mpg cyl disp hp drat wt qsec vs am gear carb
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
11 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
13 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
14 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
15 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
18 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
21 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
22 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
23 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
24 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
25 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
26 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
27 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
28 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
30 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
32 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
Slide 11
theAvgCar <- mtcars %>%
summarise_all (function (x) round (mean (x), 2 ))
theAvgCar
mpg cyl disp hp drat wt qsec vs am gear carb
1 20.09 6.19 230.72 146.69 3.6 3.22 17.85 0.44 0.41 3.69 2.81
dbWriteTable (db, "mtcars" , theAvgCar, append = TRUE )
dbGetQuery (db, "SELECT * FROM mtcars" ) %>% tail (3 )
mpg cyl disp hp drat wt qsec vs am gear carb
31 15.00 8.00 301.00 335.00 3.54 3.57 14.60 0.00 1.00 5.00 8.00
32 21.40 4.00 121.00 109.00 4.11 2.78 18.60 1.00 1.00 4.00 2.00
33 20.09 6.19 230.72 146.69 3.60 3.22 17.85 0.44 0.41 3.69 2.81
Slide 12
dbWriteTable (db, "mtcars" , mtcars, overwrite = TRUE )
dbGetQuery (db, "SELECT * FROM mtcars" )
mpg cyl disp hp drat wt qsec vs am gear carb
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
11 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
13 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
14 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
15 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
18 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
21 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
22 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
23 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
24 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
25 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
26 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
27 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
28 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
30 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
32 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
Slide 13
res <- dbSendQuery (db,
"SELECT * FROM mtcars WHERE cyl = 4" )
while (! dbHasCompleted (res)){
chunk <- dbFetch (res, n = 5 )
print (nrow (chunk))
}
#Apenas como exemplo
dbClearResult (res)
Slide 14
if ("discoCopy.db" %in% list.files ("." )){
file.remove ("discoCopy.db" )
}
Slide 15
#install.packages("vroom")
library (vroom)
Warning: package 'vroom' was built under R version 4.3.3
Attaching package: 'vroom'
The following objects are masked from 'package:readr':
as.col_spec, col_character, col_date, col_datetime, col_double,
col_factor, col_guess, col_integer, col_logical, col_number,
col_skip, col_time, cols, cols_condense, cols_only, date_names,
date_names_lang, date_names_langs, default_locale, fwf_cols,
fwf_empty, fwf_positions, fwf_widths, locale, output_column,
problems, spec
airports <- read_csv ("airports.csv"
, col_types = "cccccdd" )
airlines <- read_csv ("airlines.csv"
, col_types = "cc" )
air <- dbConnect (SQLite (), dbname= "air.db" )
dbWriteTable (air, name = "airports"
, airports)
dbWriteTable (air, name = "airlines"
, airlines)
dbListTables (air)
[1] "airlines" "airports"
Slide 16
dbDisconnect (air)
if ("air.db" %in% list.files ("." )){
file.remove ("air.db" )
}
Slide 17
Attaching package: 'dbplyr'
The following objects are masked from 'package:dplyr':
ident, sql
db <- dbConnect (SQLite (), "disco.db" ) # original
tracks <- tbl (db, "tracks" ) # dplyr
tracks
# Source: table<tracks> [?? x 9]
# Database: sqlite 3.43.2 [\\smb\ra238935\WindowsDesktop\ME315\Desafios\disco.db]
TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes
<int> <chr> <int> <int> <int> <chr> <int> <int>
1 1 For Those A… 1 1 1 Angus Y… 343719 1.12e7
2 2 Balls to th… 2 2 1 <NA> 342562 5.51e6
3 3 Fast As a S… 3 2 1 F. Balt… 230619 3.99e6
4 4 Restless an… 3 2 1 F. Balt… 252051 4.33e6
5 5 Princess of… 3 2 1 Deaffy … 375418 6.29e6
6 6 Put The Fin… 1 1 1 Angus Y… 205662 6.71e6
7 7 Let's Get I… 1 1 1 Angus Y… 233926 7.64e6
8 8 Inject The … 1 1 1 Angus Y… 210834 6.85e6
9 9 Snowballed 1 1 1 Angus Y… 203102 6.60e6
10 10 Evil Walks 1 1 1 Angus Y… 263497 8.61e6
# ℹ more rows
# ℹ 1 more variable: UnitPrice <dbl>
Slide 18
meanTracks <- tracks %>%
group_by (AlbumId) %>%
summarise (AvLen = mean (Milliseconds, na.rm = TRUE ),
AvCost = mean (UnitPrice, na.rm = TRUE ))
meanTracks
# Source: SQL [?? x 3]
# Database: sqlite 3.43.2 [\\smb\ra238935\WindowsDesktop\ME315\Desafios\disco.db]
AlbumId AvLen AvCost
<int> <dbl> <dbl>
1 1 240042. 0.99
2 2 342562 0.99
3 3 286029. 0.99
4 4 306657. 0.99
5 5 294114. 0.99
6 6 265456. 0.99
7 7 270780. 0.99
8 8 207638. 0.99
9 9 333926. 0.99
10 10 280551. 0.99
# ℹ more rows
Slide 19
meanTracks %>% show_query ()
<SQL>
SELECT `AlbumId`, AVG(`Milliseconds`) AS `AvLen`, AVG(`UnitPrice`) AS `AvCost`
FROM `tracks`
GROUP BY `AlbumId`
Slide 20
mT <- meanTracks %>% collect ()
mT
# A tibble: 347 × 3
AlbumId AvLen AvCost
<int> <dbl> <dbl>
1 1 240042. 0.99
2 2 342562 0.99
3 3 286029. 0.99
4 4 306657. 0.99
5 5 294114. 0.99
6 6 265456. 0.99
7 7 270780. 0.99
8 8 207638. 0.99
9 9 333926. 0.99
10 10 280551. 0.99
# ℹ 337 more rows