Contents

SQL queries in pandas- Basic

How to manipulate data with both SQL and pandas - Basics

Basic SQL

Data view

SQL pandas
1
2
select top 5 *
from df
1
df.head()

Filter

SQL pandas
1
2
3
select top 5 *
from df
where day = 'MON'
1
2
3
df[df.days=="MON"]
df.query("day=='MON'")
df.loc[df.day=="MON"]
1
2
3
select top 5 *
from df
where day in ('MON', 'TUE')
1
2
df.query("day in (['MON','TUE'])")
df[df.day.isin(["MON", "TUE"])]
1
2
3
select top 5 *
from df
where day like 'M%'
1
2
df.query("day.str.startswith('M)")
df[df.day.str.match(r'[Mm].')]
1
2
3
select top 5 *
from df
where num between 5 and 6
1
2
df[df.num.between(5,6)]
df.query("5<=num<=6")
1
2
3
select top 5 *
from df
where num is null
1
2
df[df.num.isna()]
df.query("num.isna()")
1
2
3
select top 5 *
from df
where num >5 and size <6
1
2
3
df[(df.num>5)&(df.size<6>)]
df.query("num>5 and size<6")
df.query("num>5 & size<6")

Sorting

SQL pandas
1
2
3
select *
from df
order by no
1
df.sort_values('no')
1
2
3
select *
from df
order by no desc, size
1
2
df.sort_values(['no', 'size'], ascending=[0,1])
df.sort_values(['no', 'size'], ascending=[False,True])

Aggregate

SQL pandas
1
2
3
select avg(no) as avg_no,
        stdev(no) as std_no
from df
1
2
df[["no"]].agg(["mean", "std"])
df.agg({'no':["mean", "std"]})

Aggregate by group

SQL pandas
1
2
3
4
select day,
       avg(no) as avg_no
from df
group by day
1
df.groupby("day")["no"].agg("mean")