Agenda = '''
1. Built in Exception
2. User Define Exception
3. Data Science Introduction
4. Numpy
a) Array
b) Matrix
5. pandas
a) Series
b) Dataframe
6. Matplotlib
a) Visualisation of Data
7. Data Science Case Study
8. Summary and Conclusion
'''
data = int(input("Enter Some data:"))
data = int(input("Enter Some data:"))
hi
dir(Exception)
help(Exception)
help(BaseException)
try:
data = int(input("Enter Some data:"))
except ValueError as e:
print("Enter Integer Value only::",e )
else:
print("Your data is", data)
while True:
try:
data = int(input("Enter Some data:"))
except ValueError as e:
print("Enter Integer Value only::",e )
else:
print("Your data is", data)
break
%%writefile ShortInputModule.py
'''User Define Exception Module.
ShortInput Exception. It should raise ShortInput Exception
for the data of length less than six bytes ( chars ).
'''
class ShortInput(Exception): # Subclass of Exception Class
''' ShortInput Exception. It should raise ShortInput Exception
for the data of length less than six bytes ( chars ).'''
def __init__(self, inputdata, atleast):
'''Initialise ShortInput Class with data and minimum size'''
self.inputdata = inputdata
self.atleast = atleast
print("Your input data size is %d bytes which is shorter than \
expected data %d bytes"%(inputdata, atleast))
if __name__ == '__main__':
Ob1 = ShortInput(5, 6)
from ShortInputModule import ShortInput
help(ShortInput)
import ShortInputModule
help(ShortInputModule)
from ShortInputModule import ShortInput
while True:
try:
data = input("Enter Some data:")
if len(data) < 6:
raise ShortInput(len(data), 6)
except ShortInput as e:
print("Shortinput Exception Occurred",e )
else:
print("Your data is", data)
break
Numpy is multidimensional array and matrices ( 2 dimension ) librarary.
import numpy as np
List1 = list(range(1,10))
print(List1)
List2 = list(range(11,20))
print(List2)
List3 = List1 + List2
print(List3)
Array1 = np.array(List1)
print(Array1)
Array2 = np.array(List2)
print(Array2)
Array3 = Array1 + Array2
print(Array3)
print(Array1.ndim)
print(Array1.shape)
print(Array1.itemsize)
Array1.dtype
print("\n Create Array of 15 items \n")
Array4 = np.arange(1,16)
print(Array4)
print("Dimension of Array: ", Array4.ndim)
print("Shape of the Array")
print(Array4.shape)
print("\n Reshape Array")
Array5 = Array4.reshape(3,5)
print(Array5)
print("\n Modified Shape of the Array \n")
print(Array5.shape)
print("\n New Dimension of Array is ",Array5.ndim)
Array5
Array6 = Array5.T
Array6
# First row of array5
Array5[0]
# First column of array5
Array5[:,0]
# Second row and second column of array5
Array5[1:2,1:2]
# Second Row and third and Fourth column
print(Array5[1:, 2:4])
# Alternate Rows and Alaternate Columns
print("\n Alternate Rows \n ")
print(Array5[::2, :])
print("\nAlternate Columns \n")
print(Array5[:, ::2])
#dir(Array5)
Array5.sum()
Array5.max()
Array5.min()
Array5.mean()
Array5.std()
print("Change Array Type to float32 ")
Array7 = Array5.astype('float32')
print(Array7)
print("\n Array Data Type :")
print(Array7.dtype)
Matrix is a two dimensional array.
Mat1 = np.matrix(Array1)
print(Mat1)
print(Mat1.ndim)
Mat2 = np.matrix(Array5)
print(Mat2)
print(Mat2.ndim)
# Transpose of Matrix
Mat3 = Mat2.T
print(Mat3)
# Inverse of Matrix
Mat5 = Mat2.I
print(Mat5)
1. Pandas is a software library written for the Python programming language for data manipulation and analysis.
2. DataFrame object for data manipulation with integrated indexing.
3.Tools for reading and writing data between in-memory data structures and different file formats.
4.Data alignment and integrated handling of missing data.
5. Reshaping and pivoting of data sets.
6.Label-based slicing, fancy indexing, and subsetting of large data sets.
7. Data structure column insertion and deletion.
8. Group by engine allowing split-apply-combine operations on data sets.
9. Data set merging and joining.
10. Hierarchical axis indexing to work with high-dimensional data in a lower-dimensional data structure.
11. Time series-functionality: Date range generation and frequency conversion, moving window statistics, moving window linear regressions, date shifting and lagging.
12. Provides data filtration.
import pandas as pd
EmpName = ["Amit", "Amar", "Akabar", "Anthony", "Isha", "Disha", "Hema"]
Salary = [200000, 300000, 400000, 500000, 250000, 350000, 100000]
print("\n Employee Name Series \n")
Series1 = pd.Series(EmpName)
print(Series1)
print("\n Employee Salary Series \n ")
Series2 = pd.Series(Salary)
print(Series2)
#dir(pd.Series)
EmpDict = dict(zip(EmpName, Salary))
print(EmpDict)
EmpSeries = pd.Series(EmpDict)
print(EmpSeries)
EmpSeries.index
EmpSeries.values
EmpSeries.keys()
EmpSeries.value_counts()
EmpSeries['Amit']
EmpSeries['Amit'] = 300000
EmpSeries
EmpSeries.ndim
EmpSeries.shape
EmpSeries.max()
EmpSeries.mean()
EmpSeries.min()
EmpSeries.sum()
EmpSeries.dtype
EmpSeries.dtypes
EmpSeries[:3]
EmpSeries[2:]
df = pd.DataFrame(EmpSeries)
df.columns
df.index
df.ndim
df.dtypes
df.shape
df.columns=['Salary']
df
df['Salary']
df['Salary'].max()
df['Salary'].mean()
df['Salary'].sum()
df.describe()
df.Salary
df.Salary.plot()
df.info()
df1 = pd.DataFrame(zip(EmpName,Salary))
print(df1)
df1.columns
df1.columns = ['EmpName', 'EmpSalary']
df1
df1.shape
df1.index
df1.describe()
df1.info()
df1.to_csv('EmpData.csv',index=False)
cat Empdata.csv
pwd
path = r'/Users/surendra/Empdata.csv'
df2 = pd.read_csv(path)
df2
df2['EmpName']
df2['EmpSalary']
df2.iloc[:3]
df2.iloc[0:5]
df2[:2]
df2.EmpName
df2.head()
df2.tail()
df2.sort_values('EmpSalary')
df2.sort_values('EmpName', ascending = False )
df2.plot(kind = 'bar')
df2.plot(kind = 'line')
df2.plot.hist()
df2.plot(kind = 'barh')
df2.plot(kind = 'pie',x='EmpName',y='EmpSalary', figsize = (7,5),use_index = True)
df2.plot(kind = 'area',x='EmpName',y='EmpSalary', use_index=True, figsize = (8,8))
df2.plot(kind = 'hist',x='EmpName',y='EmpSalary', figsize = (8,5))
df2.plot(kind = 'box',x='EmpName',y='EmpSalary', figsize = (8,8))