The following article provides a comparison between R/S-Plus and Python for basic data management and manipulation.
Load Data
R/S-Plus
mydata <- read.csv("c:/customer.csv") mydata_lkup <- read.csv("c:/purchaseorder.csv")
Python
import csv custDS=[] for row in csv.reader(open('c:/customer.csv'), delimiter=',', quotechar='"'): custDS.append(row) poDS=[] for row in csv.reader(open('c:/purchaseorder.csv'), delimiter=',', quotechar='"'): poDS.append(row)
Selecting- All Fields
R/S-Plus
mydata
Python
print(custDS)
One Field
R/S-Plus
mydata$col1
Python
for x in custDS: print x[1]
Subset
R/S-Plus
subset(mydata, col2 == 18 )
Python
[x for x in custDS if x[2]=='18']
Sorting
R/S-Plus
mydata [order(mydata [,1]),]
Python
sorted(custDS, key=lambda customer: customer[2])
Join
R/S-Plus
merge (mydata_2, mydata_lkup, by.x = col1 , by.y = col2 , all = TRUE )
Python
poDt={} for row in csv.reader(open('c:/purchaseorder.csv'), delimiter=',', quotechar='"'): poDt[row[0]] = row[1:4]dsOut=[] for x in custDS: if x[0] in poDt: x.extend(poDt[x[0]] ) print(x)
Sample
R/S-Plus
head(mydata , 10)
Python
for i in [0,1,2]: print(poDS[i])
Aggregate Analysis
R/S-Plus
xtabs( ~ col2, mydata_lkup)
Python
poCounts = {} for row in poDS_sorted: poCounts[row[1]] = poCounts.get(row[1],0) + 1 print(poCounts)
Unique
R/S-Plus
unique(mydata_lkup$col2)