Print Print Print Print: Import As
Print Print Print Print: Import As
import numpy as np
ARR1=np.random.rand(2,3)
print(ARR1)
print("MEAN:",np.mean(ARR1,axis=1))
print("STANDAR DEV:",np.std(ARR1,axis=1))
print("VARIANCE:",np.var(ARR1,axis=1))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=np.random.randint(1,100,(a,b))
print(Arr)
print(Arr.shape)
print(type(Arr))
print(Arr.dtype)
r=np.reshape(Arr,(b,a))
print("Array After reshape:\n",r)
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=np.array([0,2,3,0,4,5,np.nan])
print(np.where(a==0))
print(np.where(a!=0))
print(np.where(np.isnan(a)))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
Array1=np.random.randint(1,10,6)
Array2=np.random.randint(1,10,6)
Array3=np.random.randint(1,10,6)
print("Array1 = ",Array1)
print("Array2 = ",Array2)
print("Array3 = ",Array3)
Array4=Array2-Array3
print("Array4 = ",Array4)
Array5=Array1*2
print("Array5 = ",Array5)
print("Covariance of Array1 and Array4=\n",np.cov(Array1,Array4))
print("Covariance of Array1 and Array5=\n",np.cov(Array1,Array5))
print("Corealation of Array1 and Array4=\n",np.corrcoef(Array1,Array4))
print("Corealation of Array1 and Array5=\n",np.corrcoef(Array1,Array5))
#-----------------------------------------------------------------------------
--------------------
import numpy as np
Array1=np.random.randint(1,10,10)
Array2=np.random.randint(1,10,10)
print("Sum is:",np.add(Array1[:5],Array2[:5]))
print("Product is:",np.multiply(Array1[5:10],Array2[5:10]))
#-----------------------------------------------------------------------------
---------------------
#Question 3
import pandas as pd
import numpy as np
a=pd.DataFrame(np.random.randn(50,3),columns=['A','B','C'])
print(a)
null_val=int(0.1*a.size)
print(null_val)
ind_null_val=np.random.choice(a.size,null_val)
a.values.flat[ind_null_val]=np.nan
print(a)
#-----------------------------------------------------------------------------
----
col=a.dropna(thresh=45,axis=1)
print(col)
#-----------------------------------------------------------------------------
---
print("No. of missing values:",a.isnull().sum().sum())
#-----------------------------------------------------------------------------
---
print(a.sort_values(by=['A']))
#-----------------------------------------------------------------------------
---
print(a.drop_duplicates("A"))
#-----------------------------------------------------------------------------
---
print("Covariance of Column1 and Column2=\n",a['A'].cov(a['B']))
print("Corelation of Column1 and Column2=\n",a['B'].cov(a['C']))
#-----------------------------------------------------------------------------
----
print(pd.cut(a['B'], 5, precision=2))
#-----------------------------------------------------------------------------
----
#Question 7
data={"Family
Name":['Shah','Vats','Vats','Kumar','Vats','Kumar','Shah','Shah','Kumar','Vats
'],
"Gender":['Male','Male','Female','Female','Female','Male','Male','Female
','Female','Male'],
"Income":[44000,65000,43150,66500,255000,103000,55000,112400,81030,71900
]}
df=pd.DataFrame(data)
print(df)
print(df.groupby('Family Name')['Income'].sum())
print(df.groupby('Family Name')['Income'].agg(['max','min']))
print(df[df['Income']<80000])
females=df[df['Gender']=='Female']
Avg_income=females
df2=df.drop(df[df['Income']<df['Income'].mean()].index)
print(df2)
#IRIS PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\iris.xlsx")
plt.figure(figsize=(6, 4))
a.plot.bar(width=2)
plt.xlabel('Class Label')
plt.ylabel('Frequency')
plt.title('Frequency of Each Class Label')
plt.show()
# d. Scatter plot for Petal Length vs Sepal Length with regression line
plt.figure(figsize=(8, 6))
sns.regplot(x='Sepal_length', y='Petal_length', data=a)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.title('Scatter plot: Petal Length vs Sepal Length with Regression Line')
plt.show()
# g. Heatmap for any two numeric attributes (e.g., Sepal Length and Petal
Width)
numeric_attributes = ['Sepal_length', 'Petal_width']
sns.heatmap(a[numeric_attributes].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for Numeric Attributes')
plt.show()
statistics = a.describe()
mode = a.mode().iloc[0]
std_error = a.sem()
confidence_interval = 1.96 * (a.std() / (len(a) ** 0.5))
#TITANIC PRACTICLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\titanic.xlsx")
df=a.copy(deep=True)
# a. Clean the data by dropping the column with the largest number of missing
values
missing_values = a.isnull().sum()
column_to_drop = missing_values.idxmax()
df.drop(column_to_drop, axis=1, inplace=True)
print(df)
# f. Draw a scatter plot for passenger fare paid by Female and Male passengers
separately
plt.figure(figsize=(8, 6))
sns.scatterplot(data=a, x='Fare', y='Sex', hue='Sex')
plt.title('Scatter plot of Fare Paid by Gender')
plt.xlabel('Fare')
plt.ylabel('Gender')
plt.show()
f5=f4.drop_duplicates(["name"])
print(len(f5))
print("d")
index=f4.set_index(['name','Date'])
print(index.describe())
print("a")
c=pd.merge(a,b,on='name')
print(c)
print("b")
print(f4[~f4['name'].isin(c['name'])])