Python Data Processing

2026-01-19T00:00:00

python data-science pandas

Python Data Processing Example

This snippet demonstrates data processing using pandas and numpy.

PYTHON


1
2    import pandas as pd
3    import numpy as np
4    from sklearn.preprocessing import StandardScaler
5    
6    # Create sample data
7    data = {
8        'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
9        'age': [25, 30, 35, 28, 32],
10        'salary': [50000, 60000, 70000, 55000, 65000],
11        'department': ['IT', 'HR', 'Finance', 'IT', 'Marketing']
12    }
13    
14    # Create DataFrame
15    df = pd.DataFrame(data)
16    print("Original DataFrame:")
17    print(df)
18    
19    # Data preprocessing
20    # 1. Handle missing values
21    df.fillna({'salary': df['salary'].mean()}, inplace=True)
22    
23    # 2. Standardize numerical columns
24    scaler = StandardScaler()
25    numerical_cols = ['age', 'salary']
26    df[numerical_cols] = scaler.fit_transform(df[numerical_cols])
27    
28    # 3. One-hot encode categorical columns
29    df_encoded = pd.get_dummies(df, columns=['department'])
30    
31    print("\nProcessed DataFrame:")
32    print(df_encoded)
33    
34    # 4. Group by department and calculate mean salary
35    dept_salary = df.groupby('department')['salary'].mean()
36    print("\nAverage salary by department:")
37    print(dept_salary)

Back to Snippets

Python Data Processing Example

This snippet demonstrates data processing using pandas and numpy.

PYTHON


1
2    import pandas as pd
3    import numpy as np
4    from sklearn.preprocessing import StandardScaler
5    
6    # Create sample data
7    data = {
8        'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
9        'age': [25, 30, 35, 28, 32],
10        'salary': [50000, 60000, 70000, 55000, 65000],
11        'department': ['IT', 'HR', 'Finance', 'IT', 'Marketing']
12    }
13    
14    # Create DataFrame
15    df = pd.DataFrame(data)
16    print("Original DataFrame:")
17    print(df)
18    
19    # Data preprocessing
20    # 1. Handle missing values
21    df.fillna({'salary': df['salary'].mean()}, inplace=True)
22    
23    # 2. Standardize numerical columns
24    scaler = StandardScaler()
25    numerical_cols = ['age', 'salary']
26    df[numerical_cols] = scaler.fit_transform(df[numerical_cols])
27    
28    # 3. One-hot encode categorical columns
29    df_encoded = pd.get_dummies(df, columns=['department'])
30    
31    print("\nProcessed DataFrame:")
32    print(df_encoded)
33    
34    # 4. Group by department and calculate mean salary
35    dept_salary = df.groupby('department')['salary'].mean()
36    print("\nAverage salary by department:")
37    print(dept_salary)