3 R List & Python Dictionary

3.1 Basics

  • R list : CRAN Lists
    • An R list is an object consisting of an ordered collection of objects known as its ‘components’.
      • Each component can be of any type of R object and of any length. It can even include list, environment etc.
    • Components are always numbered. Components may also be named.
    • The vector of names is in fact simply an attribute of the list.
    • \([[ \ldots ]]\) vs. \([ \ldots ]\)
      • [[ ]] is the operator used to select a single element, whereas [ ] is a general subscripting operator.
      • Thus [[ ]] provides the first object in the list, and if it is a named list the name is not included.
      • However, [ ] provides a sublist of the list consisting of the first entry only. If it is a named list, the names are transferred to the sublist.
    • list is a vector but not an ‘atomic vector’
  • Python dict : Python Dictionary
    • dict : Literal {}, Ordered, Mutable, No Duplicates
    • Dictionaries compare equal if and only if they have the same (key, value) pairs (regardless of ordering).
    • Essentially it means ‘insertion ordered’ which is different from ordered like OrderedDict
    • Equality comparisons with OrderedDict are order sensitive but this is not the case for dict
  • Create & Print

R

aa = list()                                       #Empty List
print(aa)
## list()

aa <- list(x = c(11L, 12L), y = c('a', 'b', 'c')) #Simple List
print(aa)
## $x
## [1] 11 12
## 
## $y
## [1] "a" "b" "c"

str(aa)                                           #Structure
## List of 2
##  $ x: int [1:2] 11 12
##  $ y: chr [1:3] "a" "b" "c"

Python

pp = {}                                           #Empty Dictionary
print(f'{type(pp) = } | {len(pp) = } | {pp = }')
## type(pp) = <class 'dict'> | len(pp) = 0 | pp = {}


pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}        #Simple Dictionary
print(f'{len(pp)=}, {pp = }')
## len(pp)=2, pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}

  • Order sensitive comparison is done in R for list. However, Python does not consider position during dict comparison

R

# Create list by separately supplying component names
aa <- list(x = c(11L, 12L), y = c('a', 'b', 'c'))
bb <- setNames(list(c(11L, 12L), c('a', 'b', 'c')), nm = c('x', 'y'))
stopifnot(identical(aa, bb))

dd <- list(y = c('a', 'b', 'c'), x = c(11L, 12L))
stopifnot(!identical(aa, dd))           #FALSE: R considers position
#To ignore position (like Python)
stopifnot(identical(aa[order(names(aa))], dd[order(names(dd))]))

Python

# Create dict by separately supplying component names
pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}
qq = dict(zip(['x', 'y'], [[11, 12], ('a', 'b', 'c')]))

ss = {'y': ('a', 'b', 'c'), 'x': [11, 12]}
assert(pp == qq == ss)                  #True: Python does not consider position
assert(list(pp.items()) != list(ss.items()))      #To consider position (like R)

  • Count Components

R

aa <- list(x = c(11L, 12L), y = c('a', 'b', 'c')) #Simple List
length(aa)                                        #Count components
## [1] 2

lengths(aa)                                       #Count elements of components
## x y 
## 2 3

Python

pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}        #Simple Dictionary
len(pp)                                           #Count key: value pairs
## 2


{k:len(v) for k, v in pp.items()}                 #Count length of each pair
## {'x': 2, 'y': 3}

  • Names and Types

R

aa <- list(x = c(11L, 12L), y = c('a', 'b', 'c')) #Simple List
names(attributes(aa))                             #Attributes
## [1] "names"

bb <- names(aa)                                   #Names of components
bb
## [1] "x" "y"

# Find Names
stopifnot(all( 'x' %in% bb,  any('x' == bb),  is.element('x', set = bb), 
              !'z' %in% bb, !any('z' == bb), !is.element('z', set = bb))) 

# Note: %in% operator is NOT commutative i.e. a %in% b != b %in% a
stopifnot(! identical(1 %in% c(1, 2), c(1, 2) %in% 1))

sapply(aa, typeof)                                #Type of components
##           x           y 
##   "integer" "character"

Python

pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}        #Simple Dictionary
list(pp.keys())                                   #List of keys
## ['x', 'y']

assert('x' in pp.keys() and 'z' not in pp.keys()) #Find Keys

list(pp.values())                                 #List of Values
## [[11, 12], ('a', 'b', 'c')]


list(pp.items())                                  #List of items
## [('x', [11, 12]), ('y', ('a', 'b', 'c'))]


print('\n'.join([k + ' : ' + str(type(pp[k]).__name__) 
                for k, v in pp.items()]))         #Type of items
## x : list
## y : tuple

  • Slice or Subset or Select: Indexing starts from 1 in R and from 0 in Python.

R

aa <- list(x = c(11L, 12L), y = c('a', 'b', 'c')) #Simple List

bb <- aa[[1]]                                     #[[ ]] First Object: index 1
stopifnot(all(sapply(list(aa$x, aa[['x']]), identical, bb)))
print(bb)
## [1] 11 12
str(bb)
##  int [1:2] 11 12

dd <- aa[1]                                       #[ ] Sublist with First Entry
print(dd)
## $x
## [1] 11 12
str(dd)
## List of 1
##  $ x: int [1:2] 11 12

Python

pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}        #Simple Dictionary

qq = list(pp.items())[0]                          #First Object: index 0
print(f'{type(qq) = } | {len(qq) = } | {qq = }')  #Converted to Tuple
## type(qq) = <class 'tuple'> | len(qq) = 2 | qq = ('x', [11, 12])


qq = pp['x']                                    
print(f'{type(qq) = } | {len(qq) = } | {qq = }')  #Keep original as list
## type(qq) = <class 'list'> | len(qq) = 2 | qq = [11, 12]

  • Type checking

R

aa <- list(x = c(11L, 12L), y = c('a', 'b', 'c')) #Simple List
stopifnot(is.list(aa))

stopifnot(is.vector(aa))                          #TRUE: list is vector

stopifnot(!is.atomic(aa))                         #FALSE: list is not atomic

Python

pp = {'x': [11, 12], 'y': ('a', 'b', 'c')}        #Simple Dictionary

assert(pp.__class__ is dict)                      #same as type() (Avoid)

assert(type(pp) is dict)                          #type() (Strict)

assert(isinstance(pp, dict))                      #isinstance() allows subtypes

assert(isinstance(False, int))                    #True: bool is subtype of int
assert(type(False) is not int)

  • Merge or Combine in R allows duplicate components but Python overwrites them (Last seen wins)

R

aa <- list('a' = 11L, 'b' = 22L)
bb <- list('c' = 33L, 'b' = 222L)

dd <- c(aa, bb)                         #R keeps duplicates (Avoid)
str(dd)
## List of 4
##  $ a: int 11
##  $ b: int 22
##  $ c: int 33
##  $ b: int 222

ee <- modifyList(aa, val = bb)          #To overwrite duplicates (like Python)
str(ee)
## List of 3
##  $ a: int 11
##  $ b: int 222
##  $ c: int 33

#Overwrite duplicates: This may result in position change of key
str(c(aa, bb)[!duplicated(c(names(aa), names(bb)), fromLast = TRUE)])
## List of 3
##  $ a: int 11
##  $ c: int 33
##  $ b: int 222

Python

pp = {'a': 11, 'b': 22}
qq = {'c': 33, 'b': 222}

ss = copy.deepcopy(pp)
ss.update(qq)                           #Update in-place

tt = {**pp, **qq}                       #Merge using **

uu = pp | tt                            #Merge using |

vv = copy.deepcopy(pp)
vv |= qq                                #Update in-place using |=
assert(ss == tt == uu == vv)

print(f'{len(ss)=}, {ss = }')
## len(ss)=3, ss = {'a': 11, 'b': 222, 'c': 33}

  • Add or Update Components

R

aa <- list('a' = 11L, 'b' = 22L)

aa$b <- 222L
aa$b
## [1] 222

aa[['b']] <- 22L

aa$c <- 33L
str(aa)
## List of 3
##  $ a: int 11
##  $ b: int 22
##  $ c: int 33

Python

pp = {'a': 11, 'b': 22}

pp['b'] = 200                                     #subscript notation
pp['c'] = 33
assert(pp['b'] == 200)

pp.update({'b': 300})                             #Slow
assert(pp['b'] == 300)

pp.update([('b', 22)])                            #Slow
assert(pp['b'] == 22)

print(f'{len(pp)=}, {pp = }')
## len(pp)=3, pp = {'a': 11, 'b': 22, 'c': 33}

  • Delete components

R

aa <- list('a' = 11L, 'b' = 22L, 'c' = 33L)

bb <- within(aa, rm('b', 'c'))                    #Error if key not found

bb[c('b', 'c', 'xyz')] <- NULL                    #No error if key not found

dd <- bb[setdiff(names(aa), c('b', 'c', 'xyz'))]  #No error if key not found
stopifnot(identical(bb, dd))
str(bb)
## List of 1
##  $ a: int 11

Python

pp = {'a': 11, 'b': 22, 'c': 33}
qq = copy.deepcopy(pp)
ss = copy.deepcopy(pp)
tt = copy.deepcopy(pp)

uu = {k: v for k, v in pp.items() if k not in ('b', 'c', 'xyz')} #Slow

for k in ('b', 'c'): del qq[k]          #Return None, Error if key not found

for k in ('b', 'c'): ss.pop(k)          #Return Value, Error if key not found
## 22
## 33
for k in ('xyz'): ss.pop(k, None)       #Using None, No error if key not found

[tt.pop(k) for k in ('b', 'c')]
## [22, 33]
[tt.pop(k, None) for k in ('xyz')]
## [None, None, None]


assert(qq == ss == tt == uu)
print(f'{len(qq)=}, {qq = }')
## len(qq)=1, qq = {'a': 11}

3.2 Nested

R

aa <- list('a' = 11L, 'b' = 22L)
bb <- list('c' = 33L, 'd' = 44L, 'e' = aa)        #Nested List
str(bb)
## List of 3
##  $ c: int 33
##  $ d: int 44
##  $ e:List of 2
##   ..$ a: int 11
##   ..$ b: int 22

Python

pp = {'a': 11, 'b': 22}
qq = {'c': 33, 'd': 44, 'e': pp}                  #Nested Dictionary
print(f'{len(qq)=}, {qq = }')
## len(qq)=3, qq = {'c': 33, 'd': 44, 'e': {'a': 11, 'b': 22}}

3.3 With Multiple Types

R

aa_lst <- list(11L, 22L)                #Unnamed List   | Tuple
bb_lst <- list(a = 11L, b = 22L)        #Named List     | Dict
dd_vec <- c(11L, 22L)                   #Unnamed Vector | List

ee <- list(a = 11L, b = 'x', c = FALSE, d = NULL, 
           e = aa_lst, f = dd_vec, g = bb_lst)
lst_r <- ee                             #R list to transform into Python dict
#str(ee)

Python

# Exclude Python sets because these are converted to R Environment
pp_tup = (11, 22)             #Tuple: Ordered, Immutable | Unnamed List
qq_lst = [11, 22]             #List: Ordered, Mutable    | Unnamed Vector
ss_dic = {'a': 11, 'b': 22}   #Dict: Ordered, Mutable    | Named List

tt = {'a': 11, 'b': 'x', 'c': False, 'd': None, 
      'e': pp_tup, 'f': qq_lst, 'g': ss_dic}
dic_y = copy.deepcopy(tt)     #Python dict to transform into R list

#print('\n'.join([k + ' : ' + str(type(tt[k]).__name__) + ' : ' + str(v) 
#                for k, v in tt.items()]))

R

aa <- py$dic_y                          #Convert Python dict to R List
stopifnot(identical(lst_r, aa))         #TRUE
str(aa)
## List of 7
##  $ a: int 11
##  $ b: chr "x"
##  $ c: logi FALSE
##  $ d: NULL
##  $ e:List of 2
##   ..$ : int 11
##   ..$ : int 22
##  $ f: int [1:2] 11 22
##  $ g:List of 2
##   ..$ a: int 11
##   ..$ b: int 22

Python

pp = r.lst_r                            #Convert R List to Python dict
pp['e'] = tuple(pp['e'])                #List to Tuple
assert(dic_y == pp)                     #True

print('\n'.join([k + ' : ' + str(type(pp[k]).__name__) + ' : ' + str(v) 
                for k, v in pp.items()]))
## a : int : 11
## b : str : x
## c : bool : False
## d : NoneType : None
## e : tuple : (11, 22)
## f : list : [11, 22]
## g : dict : {'a': 11, 'b': 22}