Introduction

La transposition d’un dataframe est une operation utile mais moins frequente que d’autres operations (agregats, jointures). On peut quelquefois oublier le nom des differents arguments, ou quelle fonction convertit au format long ou large. Les sections suivantes pourront vous servir d’aide-memoire, en sachant qu’il y a d’autres choix possibles de fonctions, en Python ou en R, pour realiser ces transpositions (pivot.table avec pandas, cast et melt avec data.table, …) .

library("dplyr")
library("tidyr")
tips = reshape2::tips %>% group_by(sex, smoker, day, time, size) %>% 
  summarise(total_bill = sum(total_bill),
            tip = sum(tip)) %>% ungroup()
head(tips)
# A tibble: 6 x 7
  sex    smoker day   time    size total_bill   tip
  <fct>  <fct>  <fct> <fct>  <int>      <dbl> <dbl>
1 Female No     Fri   Dinner     2      22.8   3.25
2 Female No     Fri   Lunch      3      16.0   3   
3 Female No     Sat   Dinner     1       7.25  1   
4 Female No     Sat   Dinner     2     149.   21.2 
5 Female No     Sat   Dinner     3      69.8  10.7 
6 Female No     Sat   Dinner     4      20.7   2.45
import pandas as pd
import numpy as np
import seaborn as sns

tips = sns.load_dataset("tips")
for colonne in ['sex', 'smoker', 'day', 'time']:
  tips[colonne] = tips[colonne].astype(object)

tips = tips.groupby(['sex', 'smoker', 'day', 'time', 'size']
  )[['total_bill', 'tip']].sum().reset_index()
tips.tail()
     sex smoker   day    time  size  total_bill    tip
51  Male    Yes   Sun  Dinner     4       57.82  10.18
52  Male    Yes   Sun  Dinner     5       30.46   2.00
53  Male    Yes  Thur   Lunch     2      152.47  22.58
54  Male    Yes  Thur   Lunch     3       18.71   4.00
55  Male    Yes  Thur   Lunch     4       20.53   4.00

Sous Python avec pandas

format long

# id_vars : identifiant des lignes du resultat, on garde ces colonnes inchangees
# value_vars : les colonnes a fusionner
# var_name: le nom de la colonne quali fusionnee
# value_name : le nom de la colonne quanti fusionnee
tips.head(30).melt(id_vars = ['sex', 'smoker', 'day', 'time', 'size'], 
  value_vars = ['total_bill', 'tip'], var_name = 'noms_col', value_name = 'valeurs_col')
       sex smoker   day    time  size    noms_col  valeurs_col
0   Female     No   Fri  Dinner     2  total_bill        22.75
1   Female     No   Fri   Lunch     3  total_bill        15.98
2   Female     No   Sat  Dinner     1  total_bill         7.25
3   Female     No   Sat  Dinner     2  total_bill       149.28
4   Female     No   Sat  Dinner     3  total_bill        69.83
5   Female     No   Sat  Dinner     4  total_bill        20.69
6   Female     No   Sun  Dinner     2  total_bill        72.81
7   Female     No   Sun  Dinner     3  total_bill        69.22
8   Female     No   Sun  Dinner     4  total_bill       119.66
9   Female     No   Sun  Dinner     5  total_bill        29.85
10  Female     No  Thur  Dinner     2  total_bill        18.78
11  Female     No  Thur   Lunch     1  total_bill        10.07
12  Female     No  Thur   Lunch     2  total_bill       237.11
13  Female     No  Thur   Lunch     3  total_bill        18.64
14  Female     No  Thur   Lunch     4  total_bill        58.91
15  Female     No  Thur   Lunch     6  total_bill        56.85
16  Female    Yes   Fri  Dinner     2  total_bill        48.80
17  Female    Yes   Fri   Lunch     2  total_bill        39.78
18  Female    Yes   Sat  Dinner     1  total_bill         3.07
19  Female    Yes   Sat  Dinner     2  total_bill       198.32
20  Female    Yes   Sat  Dinner     3  total_bill        72.47
21  Female    Yes   Sat  Dinner     4  total_bill        30.14
22  Female    Yes   Sun  Dinner     2  total_bill        27.11
23  Female    Yes   Sun  Dinner     3  total_bill        39.05
24  Female    Yes  Thur   Lunch     2  total_bill        74.95
25  Female    Yes  Thur   Lunch     3  total_bill        16.47
26  Female    Yes  Thur   Lunch     4  total_bill        43.11
27    Male     No   Fri  Dinner     2  total_bill        34.95
28    Male     No   Sat  Dinner     2  total_bill       251.38
29    Male     No   Sat  Dinner     3  total_bill       193.72
30  Female     No   Fri  Dinner     2         tip         3.25
31  Female     No   Fri   Lunch     3         tip         3.00
32  Female     No   Sat  Dinner     1         tip         1.00
33  Female     No   Sat  Dinner     2         tip        21.23
34  Female     No   Sat  Dinner     3         tip        10.74
35  Female     No   Sat  Dinner     4         tip         2.45
36  Female     No   Sun  Dinner     2         tip        12.74
37  Female     No   Sun  Dinner     3         tip        11.17
38  Female     No   Sun  Dinner     4         tip        17.56
39  Female     No   Sun  Dinner     5         tip         5.14
40  Female     No  Thur  Dinner     2         tip         3.00
41  Female     No  Thur   Lunch     1         tip         1.83
42  Female     No  Thur   Lunch     2         tip        38.01
43  Female     No  Thur   Lunch     3         tip         1.36
44  Female     No  Thur   Lunch     4         tip         8.09
45  Female     No  Thur   Lunch     6         tip         9.20
46  Female    Yes   Fri  Dinner     2         tip        10.80
47  Female    Yes   Fri   Lunch     2         tip         7.98
48  Female    Yes   Sat  Dinner     1         tip         1.00
49  Female    Yes   Sat  Dinner     2         tip        29.94
50  Female    Yes   Sat  Dinner     3         tip         9.00
51  Female    Yes   Sat  Dinner     4         tip         3.09
52  Female    Yes   Sun  Dinner     2         tip         7.00
53  Female    Yes   Sun  Dinner     3         tip         7.00
54  Female    Yes  Thur   Lunch     2         tip        12.70
55  Female    Yes  Thur   Lunch     3         tip         3.23
56  Female    Yes  Thur   Lunch     4         tip         5.00
57    Male     No   Fri  Dinner     2         tip         5.00
58    Male     No   Sat  Dinner     2         tip        42.85
59    Male     No   Sat  Dinner     3         tip        30.21

format large

# index : identifiant des lignes du resultat
# columns : le nom de la colonne quali a decouper en plusieurs colonnes
# values : le nom de la colonne des valeurs a rearranger
tips.head(30).pivot(index = ['sex', 'smoker', 'time', 'size'], 
  columns = 'day', values= 'total_bill').reset_index().fillna(0)
day     sex smoker    time  size    Fri     Sat     Sun    Thur
0    Female     No  Dinner     1   0.00    7.25    0.00    0.00
1    Female     No  Dinner     2  22.75  149.28   72.81   18.78
2    Female     No  Dinner     3   0.00   69.83   69.22    0.00
3    Female     No  Dinner     4   0.00   20.69  119.66    0.00
4    Female     No  Dinner     5   0.00    0.00   29.85    0.00
5    Female     No   Lunch     1   0.00    0.00    0.00   10.07
6    Female     No   Lunch     2   0.00    0.00    0.00  237.11
7    Female     No   Lunch     3  15.98    0.00    0.00   18.64
8    Female     No   Lunch     4   0.00    0.00    0.00   58.91
9    Female     No   Lunch     6   0.00    0.00    0.00   56.85
10   Female    Yes  Dinner     1   0.00    3.07    0.00    0.00
11   Female    Yes  Dinner     2  48.80  198.32   27.11    0.00
12   Female    Yes  Dinner     3   0.00   72.47   39.05    0.00
13   Female    Yes  Dinner     4   0.00   30.14    0.00    0.00
14   Female    Yes   Lunch     2  39.78    0.00    0.00   74.95
15   Female    Yes   Lunch     3   0.00    0.00    0.00   16.47
16   Female    Yes   Lunch     4   0.00    0.00    0.00   43.11
17     Male     No  Dinner     2  34.95  251.38    0.00    0.00
18     Male     No  Dinner     3   0.00  193.72    0.00    0.00

Sous R avec tidyr

format long

head(tips, 30) %>% pivot_longer(cols = c('total_bill', 'tip'), names_to = 'noms_col', 
                                values_to = 'valeurs_col') %>% as.data.frame()
      sex smoker  day   time size   noms_col valeurs_col
1  Female     No  Fri Dinner    2 total_bill       22.75
2  Female     No  Fri Dinner    2        tip        3.25
3  Female     No  Fri  Lunch    3 total_bill       15.98
4  Female     No  Fri  Lunch    3        tip        3.00
5  Female     No  Sat Dinner    1 total_bill        7.25
6  Female     No  Sat Dinner    1        tip        1.00
7  Female     No  Sat Dinner    2 total_bill      149.28
8  Female     No  Sat Dinner    2        tip       21.23
9  Female     No  Sat Dinner    3 total_bill       69.83
10 Female     No  Sat Dinner    3        tip       10.74
11 Female     No  Sat Dinner    4 total_bill       20.69
12 Female     No  Sat Dinner    4        tip        2.45
13 Female     No  Sun Dinner    2 total_bill       72.81
14 Female     No  Sun Dinner    2        tip       12.74
15 Female     No  Sun Dinner    3 total_bill       69.22
16 Female     No  Sun Dinner    3        tip       11.17
17 Female     No  Sun Dinner    4 total_bill      119.66
18 Female     No  Sun Dinner    4        tip       17.56
19 Female     No  Sun Dinner    5 total_bill       29.85
20 Female     No  Sun Dinner    5        tip        5.14
21 Female     No Thur Dinner    2 total_bill       18.78
22 Female     No Thur Dinner    2        tip        3.00
23 Female     No Thur  Lunch    1 total_bill       10.07
24 Female     No Thur  Lunch    1        tip        1.83
25 Female     No Thur  Lunch    2 total_bill      237.11
26 Female     No Thur  Lunch    2        tip       38.01
27 Female     No Thur  Lunch    3 total_bill       18.64
28 Female     No Thur  Lunch    3        tip        1.36
29 Female     No Thur  Lunch    4 total_bill       58.91
30 Female     No Thur  Lunch    4        tip        8.09
31 Female     No Thur  Lunch    6 total_bill       56.85
32 Female     No Thur  Lunch    6        tip        9.20
33 Female    Yes  Fri Dinner    2 total_bill       48.80
34 Female    Yes  Fri Dinner    2        tip       10.80
35 Female    Yes  Fri  Lunch    2 total_bill       39.78
36 Female    Yes  Fri  Lunch    2        tip        7.98
37 Female    Yes  Sat Dinner    1 total_bill        3.07
38 Female    Yes  Sat Dinner    1        tip        1.00
39 Female    Yes  Sat Dinner    2 total_bill      198.32
40 Female    Yes  Sat Dinner    2        tip       29.94
41 Female    Yes  Sat Dinner    3 total_bill       72.47
42 Female    Yes  Sat Dinner    3        tip        9.00
43 Female    Yes  Sat Dinner    4 total_bill       30.14
44 Female    Yes  Sat Dinner    4        tip        3.09
45 Female    Yes  Sun Dinner    2 total_bill       27.11
46 Female    Yes  Sun Dinner    2        tip        7.00
47 Female    Yes  Sun Dinner    3 total_bill       39.05
48 Female    Yes  Sun Dinner    3        tip        7.00
49 Female    Yes Thur  Lunch    2 total_bill       74.95
50 Female    Yes Thur  Lunch    2        tip       12.70
51 Female    Yes Thur  Lunch    3 total_bill       16.47
52 Female    Yes Thur  Lunch    3        tip        3.23
53 Female    Yes Thur  Lunch    4 total_bill       43.11
54 Female    Yes Thur  Lunch    4        tip        5.00
55   Male     No  Fri Dinner    2 total_bill       34.95
56   Male     No  Fri Dinner    2        tip        5.00
57   Male     No  Sat Dinner    2 total_bill      251.38
58   Male     No  Sat Dinner    2        tip       42.85
59   Male     No  Sat Dinner    3 total_bill      193.72
60   Male     No  Sat Dinner    3        tip       30.21

format large

head(tips, 30) %>% pivot_wider(id_cols = c('sex', 'smoker', 'time', 'size'), 
                               names_from = 'day', values_from = 'total_bill', values_fill = 0)
# A tibble: 19 x 8
   sex    smoker time    size   Fri    Sat   Sun  Thur
   <fct>  <fct>  <fct>  <int> <dbl>  <dbl> <dbl> <dbl>
 1 Female No     Dinner     2  22.8 149.    72.8  18.8
 2 Female No     Lunch      3  16.0   0      0    18.6
 3 Female No     Dinner     1   0     7.25   0     0  
 4 Female No     Dinner     3   0    69.8   69.2   0  
 5 Female No     Dinner     4   0    20.7  120.    0  
 6 Female No     Dinner     5   0     0     29.8   0  
 7 Female No     Lunch      1   0     0      0    10.1
 8 Female No     Lunch      2   0     0      0   237. 
 9 Female No     Lunch      4   0     0      0    58.9
10 Female No     Lunch      6   0     0      0    56.8
11 Female Yes    Dinner     2  48.8 198.    27.1   0  
12 Female Yes    Lunch      2  39.8   0      0    75.0
13 Female Yes    Dinner     1   0     3.07   0     0  
14 Female Yes    Dinner     3   0    72.5   39.0   0  
15 Female Yes    Dinner     4   0    30.1    0     0  
16 Female Yes    Lunch      3   0     0      0    16.5
17 Female Yes    Lunch      4   0     0      0    43.1
18 Male   No     Dinner     2  35.0 251.     0     0  
19 Male   No     Dinner     3   0   194.     0     0  

retour au debut du document