Introduction
La transposition d’un dataframe est une operation utile mais moins frequente que d’autres operations (agregats, jointures). On peut quelquefois oublier le nom des differents arguments, ou quelle fonction convertit au format long ou large. Les sections suivantes pourront vous servir d’aide-memoire, en sachant qu’il y a d’autres choix possibles de fonctions, en Python ou en R, pour realiser ces transpositions (pivot.table avec pandas, cast et melt avec data.table, …) .
library("dplyr")
library("tidyr")
= reshape2::tips %>% group_by(sex, smoker, day, time, size) %>%
tips summarise(total_bill = sum(total_bill),
tip = sum(tip)) %>% ungroup()
head(tips)
# A tibble: 6 x 7
sex smoker day time size total_bill tip
<fct> <fct> <fct> <fct> <int> <dbl> <dbl>
1 Female No Fri Dinner 2 22.8 3.25
2 Female No Fri Lunch 3 16.0 3
3 Female No Sat Dinner 1 7.25 1
4 Female No Sat Dinner 2 149. 21.2
5 Female No Sat Dinner 3 69.8 10.7
6 Female No Sat Dinner 4 20.7 2.45
import pandas as pd
import numpy as np
import seaborn as sns
= sns.load_dataset("tips")
tips for colonne in ['sex', 'smoker', 'day', 'time']:
= tips[colonne].astype(object)
tips[colonne]
= tips.groupby(['sex', 'smoker', 'day', 'time', 'size']
tips 'total_bill', 'tip']].sum().reset_index()
)[[ tips.tail()
sex smoker day time size total_bill tip
51 Male Yes Sun Dinner 4 57.82 10.18
52 Male Yes Sun Dinner 5 30.46 2.00
53 Male Yes Thur Lunch 2 152.47 22.58
54 Male Yes Thur Lunch 3 18.71 4.00
55 Male Yes Thur Lunch 4 20.53 4.00
Sous Python avec pandas
format long
# id_vars : identifiant des lignes du resultat, on garde ces colonnes inchangees
# value_vars : les colonnes a fusionner
# var_name: le nom de la colonne quali fusionnee
# value_name : le nom de la colonne quanti fusionnee
30).melt(id_vars = ['sex', 'smoker', 'day', 'time', 'size'],
tips.head(= ['total_bill', 'tip'], var_name = 'noms_col', value_name = 'valeurs_col') value_vars
sex smoker day time size noms_col valeurs_col
0 Female No Fri Dinner 2 total_bill 22.75
1 Female No Fri Lunch 3 total_bill 15.98
2 Female No Sat Dinner 1 total_bill 7.25
3 Female No Sat Dinner 2 total_bill 149.28
4 Female No Sat Dinner 3 total_bill 69.83
5 Female No Sat Dinner 4 total_bill 20.69
6 Female No Sun Dinner 2 total_bill 72.81
7 Female No Sun Dinner 3 total_bill 69.22
8 Female No Sun Dinner 4 total_bill 119.66
9 Female No Sun Dinner 5 total_bill 29.85
10 Female No Thur Dinner 2 total_bill 18.78
11 Female No Thur Lunch 1 total_bill 10.07
12 Female No Thur Lunch 2 total_bill 237.11
13 Female No Thur Lunch 3 total_bill 18.64
14 Female No Thur Lunch 4 total_bill 58.91
15 Female No Thur Lunch 6 total_bill 56.85
16 Female Yes Fri Dinner 2 total_bill 48.80
17 Female Yes Fri Lunch 2 total_bill 39.78
18 Female Yes Sat Dinner 1 total_bill 3.07
19 Female Yes Sat Dinner 2 total_bill 198.32
20 Female Yes Sat Dinner 3 total_bill 72.47
21 Female Yes Sat Dinner 4 total_bill 30.14
22 Female Yes Sun Dinner 2 total_bill 27.11
23 Female Yes Sun Dinner 3 total_bill 39.05
24 Female Yes Thur Lunch 2 total_bill 74.95
25 Female Yes Thur Lunch 3 total_bill 16.47
26 Female Yes Thur Lunch 4 total_bill 43.11
27 Male No Fri Dinner 2 total_bill 34.95
28 Male No Sat Dinner 2 total_bill 251.38
29 Male No Sat Dinner 3 total_bill 193.72
30 Female No Fri Dinner 2 tip 3.25
31 Female No Fri Lunch 3 tip 3.00
32 Female No Sat Dinner 1 tip 1.00
33 Female No Sat Dinner 2 tip 21.23
34 Female No Sat Dinner 3 tip 10.74
35 Female No Sat Dinner 4 tip 2.45
36 Female No Sun Dinner 2 tip 12.74
37 Female No Sun Dinner 3 tip 11.17
38 Female No Sun Dinner 4 tip 17.56
39 Female No Sun Dinner 5 tip 5.14
40 Female No Thur Dinner 2 tip 3.00
41 Female No Thur Lunch 1 tip 1.83
42 Female No Thur Lunch 2 tip 38.01
43 Female No Thur Lunch 3 tip 1.36
44 Female No Thur Lunch 4 tip 8.09
45 Female No Thur Lunch 6 tip 9.20
46 Female Yes Fri Dinner 2 tip 10.80
47 Female Yes Fri Lunch 2 tip 7.98
48 Female Yes Sat Dinner 1 tip 1.00
49 Female Yes Sat Dinner 2 tip 29.94
50 Female Yes Sat Dinner 3 tip 9.00
51 Female Yes Sat Dinner 4 tip 3.09
52 Female Yes Sun Dinner 2 tip 7.00
53 Female Yes Sun Dinner 3 tip 7.00
54 Female Yes Thur Lunch 2 tip 12.70
55 Female Yes Thur Lunch 3 tip 3.23
56 Female Yes Thur Lunch 4 tip 5.00
57 Male No Fri Dinner 2 tip 5.00
58 Male No Sat Dinner 2 tip 42.85
59 Male No Sat Dinner 3 tip 30.21
format large
# index : identifiant des lignes du resultat
# columns : le nom de la colonne quali a decouper en plusieurs colonnes
# values : le nom de la colonne des valeurs a rearranger
30).pivot(index = ['sex', 'smoker', 'time', 'size'],
tips.head(= 'day', values= 'total_bill').reset_index().fillna(0) columns
day sex smoker time size Fri Sat Sun Thur
0 Female No Dinner 1 0.00 7.25 0.00 0.00
1 Female No Dinner 2 22.75 149.28 72.81 18.78
2 Female No Dinner 3 0.00 69.83 69.22 0.00
3 Female No Dinner 4 0.00 20.69 119.66 0.00
4 Female No Dinner 5 0.00 0.00 29.85 0.00
5 Female No Lunch 1 0.00 0.00 0.00 10.07
6 Female No Lunch 2 0.00 0.00 0.00 237.11
7 Female No Lunch 3 15.98 0.00 0.00 18.64
8 Female No Lunch 4 0.00 0.00 0.00 58.91
9 Female No Lunch 6 0.00 0.00 0.00 56.85
10 Female Yes Dinner 1 0.00 3.07 0.00 0.00
11 Female Yes Dinner 2 48.80 198.32 27.11 0.00
12 Female Yes Dinner 3 0.00 72.47 39.05 0.00
13 Female Yes Dinner 4 0.00 30.14 0.00 0.00
14 Female Yes Lunch 2 39.78 0.00 0.00 74.95
15 Female Yes Lunch 3 0.00 0.00 0.00 16.47
16 Female Yes Lunch 4 0.00 0.00 0.00 43.11
17 Male No Dinner 2 34.95 251.38 0.00 0.00
18 Male No Dinner 3 0.00 193.72 0.00 0.00
Sous R avec tidyr
format long
head(tips, 30) %>% pivot_longer(cols = c('total_bill', 'tip'), names_to = 'noms_col',
values_to = 'valeurs_col') %>% as.data.frame()
sex smoker day time size noms_col valeurs_col
1 Female No Fri Dinner 2 total_bill 22.75
2 Female No Fri Dinner 2 tip 3.25
3 Female No Fri Lunch 3 total_bill 15.98
4 Female No Fri Lunch 3 tip 3.00
5 Female No Sat Dinner 1 total_bill 7.25
6 Female No Sat Dinner 1 tip 1.00
7 Female No Sat Dinner 2 total_bill 149.28
8 Female No Sat Dinner 2 tip 21.23
9 Female No Sat Dinner 3 total_bill 69.83
10 Female No Sat Dinner 3 tip 10.74
11 Female No Sat Dinner 4 total_bill 20.69
12 Female No Sat Dinner 4 tip 2.45
13 Female No Sun Dinner 2 total_bill 72.81
14 Female No Sun Dinner 2 tip 12.74
15 Female No Sun Dinner 3 total_bill 69.22
16 Female No Sun Dinner 3 tip 11.17
17 Female No Sun Dinner 4 total_bill 119.66
18 Female No Sun Dinner 4 tip 17.56
19 Female No Sun Dinner 5 total_bill 29.85
20 Female No Sun Dinner 5 tip 5.14
21 Female No Thur Dinner 2 total_bill 18.78
22 Female No Thur Dinner 2 tip 3.00
23 Female No Thur Lunch 1 total_bill 10.07
24 Female No Thur Lunch 1 tip 1.83
25 Female No Thur Lunch 2 total_bill 237.11
26 Female No Thur Lunch 2 tip 38.01
27 Female No Thur Lunch 3 total_bill 18.64
28 Female No Thur Lunch 3 tip 1.36
29 Female No Thur Lunch 4 total_bill 58.91
30 Female No Thur Lunch 4 tip 8.09
31 Female No Thur Lunch 6 total_bill 56.85
32 Female No Thur Lunch 6 tip 9.20
33 Female Yes Fri Dinner 2 total_bill 48.80
34 Female Yes Fri Dinner 2 tip 10.80
35 Female Yes Fri Lunch 2 total_bill 39.78
36 Female Yes Fri Lunch 2 tip 7.98
37 Female Yes Sat Dinner 1 total_bill 3.07
38 Female Yes Sat Dinner 1 tip 1.00
39 Female Yes Sat Dinner 2 total_bill 198.32
40 Female Yes Sat Dinner 2 tip 29.94
41 Female Yes Sat Dinner 3 total_bill 72.47
42 Female Yes Sat Dinner 3 tip 9.00
43 Female Yes Sat Dinner 4 total_bill 30.14
44 Female Yes Sat Dinner 4 tip 3.09
45 Female Yes Sun Dinner 2 total_bill 27.11
46 Female Yes Sun Dinner 2 tip 7.00
47 Female Yes Sun Dinner 3 total_bill 39.05
48 Female Yes Sun Dinner 3 tip 7.00
49 Female Yes Thur Lunch 2 total_bill 74.95
50 Female Yes Thur Lunch 2 tip 12.70
51 Female Yes Thur Lunch 3 total_bill 16.47
52 Female Yes Thur Lunch 3 tip 3.23
53 Female Yes Thur Lunch 4 total_bill 43.11
54 Female Yes Thur Lunch 4 tip 5.00
55 Male No Fri Dinner 2 total_bill 34.95
56 Male No Fri Dinner 2 tip 5.00
57 Male No Sat Dinner 2 total_bill 251.38
58 Male No Sat Dinner 2 tip 42.85
59 Male No Sat Dinner 3 total_bill 193.72
60 Male No Sat Dinner 3 tip 30.21
format large
head(tips, 30) %>% pivot_wider(id_cols = c('sex', 'smoker', 'time', 'size'),
names_from = 'day', values_from = 'total_bill', values_fill = 0)
# A tibble: 19 x 8
sex smoker time size Fri Sat Sun Thur
<fct> <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl>
1 Female No Dinner 2 22.8 149. 72.8 18.8
2 Female No Lunch 3 16.0 0 0 18.6
3 Female No Dinner 1 0 7.25 0 0
4 Female No Dinner 3 0 69.8 69.2 0
5 Female No Dinner 4 0 20.7 120. 0
6 Female No Dinner 5 0 0 29.8 0
7 Female No Lunch 1 0 0 0 10.1
8 Female No Lunch 2 0 0 0 237.
9 Female No Lunch 4 0 0 0 58.9
10 Female No Lunch 6 0 0 0 56.8
11 Female Yes Dinner 2 48.8 198. 27.1 0
12 Female Yes Lunch 2 39.8 0 0 75.0
13 Female Yes Dinner 1 0 3.07 0 0
14 Female Yes Dinner 3 0 72.5 39.0 0
15 Female Yes Dinner 4 0 30.1 0 0
16 Female Yes Lunch 3 0 0 0 16.5
17 Female Yes Lunch 4 0 0 0 43.1
18 Male No Dinner 2 35.0 251. 0 0
19 Male No Dinner 3 0 194. 0 0