Pandas is a powerful data analysis and manipulation library for Python. It provides two core data structures: Series (1D) and DataFrame (2D), which are built on top of NumPy and allow for easy and efficient data handling.
import pandas as pd
# Example 1: Create a DataFrame
data = {'Name': ['Alice', 'Bob'], 'Age': [25, 30]}
df = pd.DataFrame(data)
print(df)
# Example 2: Read CSV
df = pd.read_csv('data.csv')
print(df.head())
# Example 3: Select columns and rows
print(df['Name'])
print(df.loc[0])
# Example 4: Filtering data
print(df[df['Age'] > 26])
# Example 5: Adding new column
df['Age+10'] = df['Age'] + 10
print(df)
# Example 6: Group by and aggregate
print(df.groupby('Name').mean())
# Example 7: Handling missing values
df['City'] = [None, 'NY']
print(df.fillna('Unknown'))
# Example 8: Sorting
print(df.sort_values(by='Age'))
# Example 9: Merging DataFrames
df2 = pd.DataFrame({'Name': ['Alice'], 'Score': [90]})
print(pd.merge(df, df2, on='Name'))
# Example 10: Export to CSV
df.to_csv('output.csv', index=False)