Search code snippets, questions, articles...

pandas code snippets

Remove duplicate rows from DataFrame in Pandas
import pandas as pd

# create a dataframe
df = pd.DataFrame({
  "values": [1, 2, 1, 4, 2, 5],
  "alphabets": ["A", "B", "A", "C", "B", "D"],
  "numbers": [10, 20, 10, 40, 20, 50]
})
print(df)

# delete duplicate rows
df = df.drop_duplicates()

print(df)
Pandas - Get index values of a DataFrame as a List
import pandas as pd

# create a dataframe
df = pd.DataFrame({
  "id": [1, 2, 3, 4, 5],
  "name": ["Joy", "Rick", "Carol", "Dumpty", "Clark"],
  "designation": ["Programmer", "Manager", "Admin", "Accountant", "Designer"]
})

print(df)

# get all the indexes
indexes = df.index.values.tolist()

print("All indexes of DataFrame df are: ", end="")
print(indexes)
Pandas - Delete multiple rows from DataFrame using index list
import pandas as pd

# create a dataframe
df = pd.DataFrame({
  "id": [1, 2, 3, 4, 5],
  "name": ["Delhi", "New York", "Mumbai", "LA", "London"],
  "pincode": [100, 200, 300, 400, 500]
})

print(df)

# detete rows using index list
df.drop(df.index[[0,3,4]], inplace=True)

# print the dataframe
print(df)
Convert a Python Dictionary to Pandas DataFrame
import pandas as pd

# create a dictionary
user_info = {
  'name': ['Rick', 'John', 'Callumn', 'Manoj', 'Dustin'],
  'address': ['New York', 'LA', 'London', 'Delhi', 'Holland'],
  'designation': ['Programmer', 'Accountant', 'Admin', 'Tech Lead', 'Manager']
}

# create the dataframe from a dictionary
df = pd.DataFrame(user_info)

print(df)
Pandas - Change rows order of a DataFrame using index list
import pandas as pd

# create a dictionary
students = {
  "name": ["Tom", "John", "Rick", "Sneha", "Johnson"],
  "score": [90, 80, 98, 99, 82],
  "subjects": ["Math", "Physics", "Chemistry", "english", "Data Structure"]
}

# create the dataframe from dictionary
df = pd.DataFrame(students, index=['a', 'b', 'c', 'd', 'e'])
print(df)

#change rows orders using index list
df = df.reindex(['d', 'a', 'e', 'b', 'c'])
print(df)
Change column orders using column names list - Pandas Dataframe
import pandas as pd

data = [
  ['a_row', 'a1', 'a2', 'a3'],
  ['b_row', 'b1', 'b2', 'b3'],
  ['c_row', 'c1', 'c2', 'c3']
]

# create dataframe
df = pd.DataFrame(data, columns=['col_row', 'col_1', 'col_2', 'col_3'], index=['a', 'b', 'c'])
print(df)

# change column orders using column names list
df = df[['col_3', 'col_2', 'col_row', 'col_1']]
print(df)
Check if a column contains zero values only in Pandas DataFrame
import pandas as pd

employees = [
  ['Robin', 30, 0, 'India'],
  ['Rick', 35, 0, 'US'],
  ['Tony Stark', 24, 0, 'US'],
  ['Roney', 24, 0, 'Canada'],
  ['Sumit', 24, 0, 'India'],
  ['Parek Bisth', 24, 0, 'India']
]

# create dataframe
df = pd.DataFrame(employees, columns=['Name', 'Age', 'PeerCount', 'Country'])

print(df)

# check if PeerCount column contains 0 values only
if (df['PeerCount'] == 0).all():
  print('All values in PeerCount column are zero')
else:
  print('All values in PeerCount column are not zero')
[Pandas] Check if a column exists in a DataFrame
import pandas as pd

employees = [
  ['Robin', 30, 'SOS', 'India'],
  ['Rick', 28, 'PSA', 'US'],
  ['Tony Stark', 32, 'COS', 'US']
]

# create dataframe
df = pd.DataFrame(employees, columns=['Name', 'Age', 'Dept', 'Country'])

# check if column exist in the dataframe
if 'Age' in df.columns:
  print("Coumn exists in the DataFrame")
else:
  print("Column does not exist in the DataFrame")
Merge two or multiple DataFrames in Pandas
import pandas as pd

# DataFrame 1
df1 = pd.DataFrame({'id': ['1000', '1001', '1002', '1003'],
                    'company': ['Devsheet', 'SpaceX', 'Tesla', 'Open AI']})
  
# DataFrame 2
df2 = pd.DataFrame({'location': ['India', 'US', 'US', 'US'],
                    'product': ['Website', 'Rocket', 'Car', 'ML']})
  
result = pd.concat([df1, df2], axis=1)

print(result)
Sort a DataFrame by rows and columns in Pandas
import pandas as pd

data = {
  'score_1': [30, 40, 25, 19, 60, 15],
  'score_2': [3, 7, 9, 2, 5, 11]
}

# create dataframe
df = pd.DataFrame(data)

# sort DataFrame - ascending order
result = df.sort_values(by='score_1')

print(result)
[Pandas] Add new column to DataFrame based on existing column
import pandas as pd

employees = {
  "name": ["Richard", "Patrick", "Sam", "Vick"],
  "address": ["New York", "Atlanta", "Canada", "India"],
  "salary": [10000, 20000, 15000, 22000]
}

# create dataframe
df = pd.DataFrame(employees)

print(df)

# add new column - deduction that has 10% of salary
df["deduction"] = df["salary"] * 10 / 100

print(df)
Get the count of rows and columns of a Pandas DataFrame
import pandas as pd

data = {
  "subjects": ["Math", "Physics", "Chemistry", "English", "Hindi"],
  "scores": [90, 80, 87, 80, 70]
}

df = pd.DataFrame(data)

# Get rows and columns count
result = df.shape

print("Rows: ", result[0])
print("Columns: ", result[1])
Create DataFrame from Python List in Pandas
import pandas as pd

# create a List of Lists
data_list = [
  ["John", 30, "Math"],
  ["Tom", 50, "Physics"],
  ["Pettrick", 40, "Chemistry"],
  ["Travis", 35, "English"]
]

# create the dataframe from List
df = pd.DataFrame(data_list, columns=['name', 'score', 'subject'])
print(df)
Insert new column with default value in Pandas DataFrame
import pandas as pd

data = {
  'alphabets': ['a', 'b', 'c', 'd']
}

# Create dataframe
df = pd.DataFrame(data)

print(df)

# Add new column with default value 0
df['code'] = 0

print(df)
Add a new column with data to Pandas Dataframe
import pandas as pd

my_data = {
  "names": ["Shivam", "John", "Prateek", "Gaurav"],
  "profession": ["Engineer", "Carpenter", "Writer", "marketer"]
}

df = pd.DataFrame(my_data)

# Add new column with rows data
df['address'] = ['India', 'New York', 'India', 'India']

print(df)
Delete one or multiple columns from Pandas Dataframe
import pandas as pd

subjects = {
  "name": ["Math", "Physics", "Chemistry", "English", "Hindi"],
  "score": [98, 91, 78, 80, 76],
  "code": ["M01", "P02", "C04", "E02", "H08"]
}

# Create the dataframe
df = pd.DataFrame(subjects)

print(df)

# Delete a column
df = df.drop(columns=['name'])

print(df)
Get a column rows as a List in Pandas Dataframe
import pandas as pd

my_data = {
  "names": ["Rick", "John", "Carol", "Gibson"],
  "profession": ["Engineer", "Carpenter", "Writer", "marketer"]
}

df = pd.DataFrame(my_data)

print(df)

# This will get the list of all the rows in column names
result = df['names'].tolist()

print(result)
Replace NAN values in Pandas dataframe
df.fillna('', inplace=True)
Pandas - Remove duplicate items from list
import pandas as pd

source_list = [1, 1, 3, 2, 3, 1, 2, 4]

result = pd.unique(source_list).tolist()

print(result)
Pandas dataframe to dictionary conversion python
import pandas as pd

data = {
  'name': ['Rick', 'Carol', 'Carl', 'Negan'],
  'place': ['Alexendria', 'Kingdom', 'Alexendria', 'Saviours']
} 
df = pd.DataFrame.from_dict(data)
print(df)
#     name      place
# 0   Rick      Alexendria
# 1   Carol     Kingdom
# 2   Carl      Alexendria
# 3   Negan     Saviours

# Get dict from dataframe(Method 1)
dict1 = dict(df.values)
print(dict1)
# -> {'Rick': 'Alexendria', 'Carol': 'Kingdom', 'Carl': 'Alexendria', 'Negan': 'Saviours'}

# Get dict from dataframe(Method 2)
dict2 = df.to_dict('index')
print(dict2)
# -> {0: {'name': 'Rick', 'place': 'Alexendria'}, 1: {'name': 'Carol', 'place': 'Kingdom'}, 2: {'name': 'Carl', 'place': 'Alexendria'}, 3: {'name': 'Negan', 'place': 'Saviours'}}
Change the name of columns in a pandas dataframe
#By creating a copy - using df.rename() method
df = df.rename(columns={'col_1': 'changed_col_1', 'col_2': 'changed_col_2'})
print(df)

#Without creating copy - using df.rename() method with inplace=True
df.rename(columns={'col_1': 'changed_col_1', 'col_2': 'changed_col_2'}, inplace=True)
print(df)
Get column names from Pandas DataFrame as a python List
import pandas as pd

df = pd.DataFrame({
    'Name': ['John', 'Eric', 'Rick'],
    'Score': [200, 500, 100\]
})

df_columns = df.columns.values.tolist()
print(df_columns)
#prints - ['Name', 'Score']
Get a value from DataFrame row using index and column name in pandas
import pandas as pd

df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
                  index=[1, 2, 3], columns=['A', 'B', 'C'])

#index - 3, column_name - 'C'
value = df.at[3, 'C']
print(value) # prints - 30

#this can also written as
value = df.loc[3].at['C']
print( value ) # prints - 30
Loop through DataFrame rows in python pandas
import pandas as pd

df = pd.DataFrame({
    'column_1': ['John', 'Eric', 'Rick'],
    'column_2': [100, 110, 120]
})

for index, row in df.iterrows():
    print(row['column_1'], row['column_2'])

#prints
# John 100
# Eric 110
# Rick 120
Create pandas DataFrame and add columns and rows to it
import pandas as pd
  
#first method - create empty dataframe
df = pd.DataFrame()

# append columns and rows to this dataframe
df['username'] = ['john', 'neil', 'curtis']
df['status'] = ['active', 'disabled', 'active']
print(df)

#----------------------------------------------------------------------------------#

#second method - create dataframe with columns
df = pd.DataFrame(columns = ['username', 'status'])

# append rows to the abo
df = df.append({'username' : 'john', 'status' : 'active'}, ignore_index = True)
df = df.append({'username' : 'neil', 'status' : 'disabled'}, ignore_index = True)
df = df.append({'username' : 'curtis', 'status' : 'active'}, ignore_index = True)
print(df)

#----------------------------------------------------------------------------------#

#make data frame with rows having - NaN values at index - a, b and c
df = pd.DataFrame(columns = ['username', 'status'], index = ['a', 'b', 'c'])
print(df)
#  +---+----------+--------+
#  |   | username | status |
#  +---+----------+--------+
#  | a |      NaN |    NaN |
#  | b |      NaN |    NaN |
#  | c |      NaN |    NaN |
#  +---+----------+--------+

# add rows at already created indexes
df.loc['a'] = ['john', 'active']
df.loc['b'] = ['neil', 'disabled']
df.loc['c'] = ['curtis', 'active']
print(df)
Convert a dictionary to pandas DataFrame in python
import pandas as pd

my_dict = [{'Name': 'John', 'Score': 20}, {'Name': 'Mark', 'Score': 50}, {'Name': 'Rick', 'Score': 70}]

df = pd.DataFrame.from_dict(my_dict)
print(df)

#   +----+--------+---------+
#   |    |  Name  |  Score  |
#   +-------------+---------+
#   | 0  |  John  |     20  |
#   | 1  |  Mark  |     50  |
#   | 2  |  Rick  |     70  |
#   +----+--------+---------+
Convert pandas DataFrame to List of dictionaries python
import pandas as pd

df_fruits = pd.DataFrame()
df_fruits["fruit_name"] = ["Orange", "Banana", "Apple"]
df_fruits["color"] = ["orange", "yellow", "red"]

print(df_fruits.to_dict('records'))
# prints - [{'fruit_name': 'Orange', 'color': 'orange'}, {'fruit_name': 'Banana', 'color': 'yellow'}, {'fruit_name': 'Apple', 'color': 'red'}]
Convert pandas DataFrame to python collection - dictionary
import pandas as pd

df = pd.DataFrame()
df["Name"] = ["John", "Mark", "Rick"]
df["Score"] = [20, 50, 70]

print(df.to_dict())
# prints - {'Name': {0: 'John', 1: 'Mark', 2: 'Rick'}, 'Score': {0: 20, 1: 50, 2: 70}}

print(df.to_dict('split'))
# prints - {'index': [0, 1, 2], 'columns': ['Name', 'Score'], 'data': [['John', 20], ['Mark', 50], ['Rick', 70]]}

print(df.to_dict('records'))
# prints - [{'Name': 'John', 'Score': 20}, {'Name': 'Mark', 'Score': 50}, {'Name': 'Rick', 'Score': 70}]

print(df.to_dict('index'))
#prints - {0: {'Name': 'John', 'Score': 20}, 1: {'Name': 'Mark', 'Score': 50}, 2: {'Name': 'Rick', 'Score': 70}}
Install Pandas using pip Python
pip install pandas
remove outliers
def outliers(column):
    Q1, Q3 = np.percentile(column, [25, 75])
    IQR = Q3 - Q1
    lower_range = Q1 - (1.5 * IQR)
    upper_range = Q3 + (1.5 * IQR)
    return lower_range, upper_range

outliers(df['yearly_salary'])
Reorder dataframe columns using column names in pandas
dframe = dframe[['column 5','column 6',4,3,2,1]]