0% found this document useful (0 votes)
4 views

Print Print Print Print: Import As

This document contains code snippets demonstrating the use of various NumPy and Pandas functions for data analysis and manipulation. It includes examples of importing data, calculating statistics, reshaping arrays, merging and joining DataFrames, handling missing values, and visualizing data.

Uploaded by

Shiksha bamel
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Print Print Print Print: Import As

This document contains code snippets demonstrating the use of various NumPy and Pandas functions for data analysis and manipulation. It includes examples of importing data, calculating statistics, reshaping arrays, merging and joining DataFrames, handling missing values, and visualizing data.

Uploaded by

Shiksha bamel
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

#QUESTION 1

import numpy as np
ARR1=np.random.rand(2,3)
print(ARR1)
print("MEAN:",np.mean(ARR1,axis=1))
print("STANDAR DEV:",np.std(ARR1,axis=1))
print("VARIANCE:",np.var(ARR1,axis=1))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
print(Arr.shape)
print(type(Arr))
print(Arr.dtype)
r=np.reshape(Arr,(b,a))
print("Array After reshape:\n",r)

#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=np.array([0,2,3,0,4,5,np.nan])
print(np.where(a==0))
print(np.where(a!=0))
print(np.where(np.isnan(a)))
#-----------------------------------------------------------------------------
-------------------

import numpy as np
Array1=np.random.randint(1,10,6)
Array2=np.random.randint(1,10,6)
Array3=np.random.randint(1,10,6)
print("Array1 = ",Array1)
print("Array2 = ",Array2)
print("Array3 = ",Array3)
Array4=Array2-Array3
print("Array4 = ",Array4)
Array5=Array1*2
print("Array5 = ",Array5)
print("Covariance of Array1 and Array4=\n",np.cov(Array1,Array4))
print("Covariance of Array1 and Array5=\n",np.cov(Array1,Array5))
print("Corealation of Array1 and Array4=\n",np.corrcoef(Array1,Array4))
print("Corealation of Array1 and Array5=\n",np.corrcoef(Array1,Array5))
#-----------------------------------------------------------------------------
--------------------
import numpy as np
Array1=np.random.randint(1,10,10)
Array2=np.random.randint(1,10,10)
print("Sum is:",np.add(Array1[:5],Array2[:5]))
print("Product is:",np.multiply(Array1[5:10],Array2[5:10]))
#-----------------------------------------------------------------------------
---------------------

#a = np.array([[4,3, 1],[5 ,7, 0],[9, 9, 3],[8, 2, 4]])


a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
c=eval(input("Enter the rows1 to interchange:"))
d=eval(input("Enter the row2 to interchange:"))
Arr[[c,d],:]=Arr[[d,c],:]
print("Array After swapping")
print(Arr)

c1=int(input("Column No. to flip"))


Arr[:,c1]=np.flip(Arr[:,c1])
print("Array After reversing column")
print(Arr)

#Question 3
import pandas as pd
import numpy as np
a=pd.DataFrame(np.random.randn(50,3),columns=['A','B','C'])
print(a)
null_val=int(0.1*a.size)
print(null_val)
ind_null_val=np.random.choice(a.size,null_val)
a.values.flat[ind_null_val]=np.nan
print(a)
#-----------------------------------------------------------------------------
----
col=a.dropna(thresh=45,axis=1)
print(col)
#-----------------------------------------------------------------------------
---
print("No. of missing values:",a.isnull().sum().sum())
#-----------------------------------------------------------------------------
---
print(a.sort_values(by=['A']))
#-----------------------------------------------------------------------------
---
print(a.drop_duplicates("A"))
#-----------------------------------------------------------------------------
---
print("Covariance of Column1 and Column2=\n",a['A'].cov(a['B']))
print("Corelation of Column1 and Column2=\n",a['B'].cov(a['C']))
#-----------------------------------------------------------------------------
----
print(pd.cut(a['B'], 5, precision=2))
#-----------------------------------------------------------------------------
----
#Question 7
data={"Family
Name":['Shah','Vats','Vats','Kumar','Vats','Kumar','Shah','Shah','Kumar','Vats
'],
"Gender":['Male','Male','Female','Female','Female','Male','Male','Female
','Female','Male'],
"Income":[44000,65000,43150,66500,255000,103000,55000,112400,81030,71900
]}
df=pd.DataFrame(data)
print(df)
print(df.groupby('Family Name')['Income'].sum())
print(df.groupby('Family Name')['Income'].agg(['max','min']))
print(df[df['Income']<80000])
females=df[df['Gender']=='Female']
Avg_income=females
df2=df.drop(df[df['Income']<df['Income'].mean()].index)
print(df2)

#IRIS PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

a=pd.read_excel(r"C:\Users\HP\Downloads\iris.xlsx")

# a. Display data types and info on the dataset


print(a.info())

# b. Find number of missing values in each column


missing_values = a.isnull().sum()
print("Missing Values:\n", missing_values)

# c. Plot bar chart for frequency of each class label

plt.figure(figsize=(6, 4))
a.plot.bar(width=2)
plt.xlabel('Class Label')
plt.ylabel('Frequency')
plt.title('Frequency of Each Class Label')
plt.show()

# d. Scatter plot for Petal Length vs Sepal Length with regression line
plt.figure(figsize=(8, 6))
sns.regplot(x='Sepal_length', y='Petal_length', data=a)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.title('Scatter plot: Petal Length vs Sepal Length with Regression Line')
plt.show()

# e. Density distribution for feature Petal Width


plt.figure(figsize=(8, 6))
sns.kdeplot(data=a['Petal_width'], shade=True)
plt.xlabel('Petal Width')
plt.ylabel('Density')
plt.title('Density Distribution of Petal Width')
plt.show()

# f. Pair plot for pairwise bivariate distribution


sns.pairplot(a)
plt.show()

# g. Heatmap for any two numeric attributes (e.g., Sepal Length and Petal
Width)
numeric_attributes = ['Sepal_length', 'Petal_width']
sns.heatmap(a[numeric_attributes].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Numeric Attributes')
plt.show()

# h. Compute statistics for each numeric feature

statistics = a.describe()
mode = a.mode().iloc[0]
std_error = a.sem()
confidence_interval = 1.96 * (a.std() / (len(a) ** 0.5))

# Print computed statistics


print("Statistics for each numeric feature:\n", statistics)
print("\nMode for each numeric feature:\n", mode)
print("\nStandard Error for each numeric feature:\n", std_error)
print("\nConfidence Interval for each numeric feature:\n",
confidence_interval)

# i. Compute correlation coefficients between each pair of features and plot


heatmap
correlation_matrix = a.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Iris Dataset')
plt.show()

#TITANIC PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\titanic.xlsx")
df=a.copy(deep=True)
# a. Clean the data by dropping the column with the largest number of missing
values
missing_values = a.isnull().sum()
column_to_drop = missing_values.idxmax()
df.drop(column_to_drop, axis=1, inplace=True)
print(df)

# b. Find the total number of passengers with age more than 30


passengers_over_30 = a[a['Age'] > 30]
total_passengers_over_30 = len(passengers_over_30)
print("No. of passengers over 30 :",total_passengers_over_30)

# c. Find the total fare paid by passengers of the second class


total_fare_second_class = a[a['Pclass'] == 2]['Fare'].sum()
print("Total fare of second class : ",total_fare_second_class)

# d. Compare the number of survivors of each passenger class


survivors_per_class = a.groupby('Pclass')['Survived'].sum()
print("No. of Survivors of each class\n",survivors_per_class)

# e. Compute descriptive statistics for age attribute gender-wise


descriptive_stats_age_gender = a.groupby('Sex')['Age'].describe()
print("Descriptive statistics for age attribute gender
wise\n",descriptive_stats_age_gender)

# f. Draw a scatter plot for passenger fare paid by Female and Male passengers
separately
plt.figure(figsize=(8, 6))
sns.scatterplot(data=a, x='Fare', y='Sex', hue='Sex')
plt.title('Scatter plot of Fare Paid by Gender')
plt.xlabel('Fare')
plt.ylabel('Gender')
plt.show()

# g. Compare density distribution for features age and passenger fare


plt.figure(figsize=(10, 6))
sns.kdeplot(data=a['Age'], label='Age', shade=True)
sns.kdeplot(data=a['Fare'], label='Fare', shade=True)
plt.title('Density Distribution of Age and Fare')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.show()

# h. Draw a pie chart for three passenger classes


class_counts = a['Pclass'].value_counts()
plt.figure(figsize=(6, 6))
plt.pie(class_counts, labels=['Class 3', 'Class 1', 'Class 2'],
autopct='%1.1f%%', colors=['skyblue', 'lightgreen', 'lightcoral'])
plt.title('Passenger Class Distribution')
plt.show()

# i. Find % of survived passengers for each class and analyze


survived_per_class = a.groupby('Pclass')['Survived'].mean() * 100
print("% of survived passengers for each class\n",survived_per_class)
#question4
import pandas as pd
a=pd.read_excel(r"C:\Users\HP\Documents\Day1.xlsx")
print(a)
b=pd.read_excel(r"C:\Users\HP\Documents\Day2.xlsx")
print(b)
print("c")
f4=pd.concat([a,b])

f5=f4.drop_duplicates(["name"])
print(len(f5))
print("d")
index=f4.set_index(['name','Date'])
print(index.describe())
print("a")
c=pd.merge(a,b,on='name')
print(c)
print("b")
print(f4[~f4['name'].isin(c['name'])])

You might also like