Visualisation of The Data - Jupyter Notebook
Visualisation of The Data - Jupyter Notebook
In [2]: df = pd.read_excel('2001_final.xlsx')
In [3]: df
In [4]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2229 entries, 0 to 2228
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ad_observation_id 2229 non-null object
1 depth 2229 non-null float64
2 temperature 2229 non-null float64
3 salinity 2229 non-null float64
4 density 2229 non-null float64
5 ao_wmo_number 2229 non-null int64
6 latitude 2229 non-null float64
7 longitude 2229 non-null float64
8 date 2229 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(6), int64(1), object(1)
memory usage: 156.9+ KB
In [5]: df1=df
In [6]: df1
In [7]: df1=df1.drop_duplicates()
df1
In [8]: df=df[['date','temperature','salinity']]
In [9]: df
date
In [12]: sns.pairplot(df1)
In [ ]:
In [13]: df1.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2229 entries, 0 to 2228
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ad_observation_id 2229 non-null object
1 depth 2229 non-null float64
2 temperature 2229 non-null float64
3 salinity 2229 non-null float64
4 density 2229 non-null float64
5 ao_wmo_number 2229 non-null int64
6 latitude 2229 non-null float64
7 longitude 2229 non-null float64
8 date 2229 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(6), int64(1), object(1)
memory usage: 238.7+ KB
In [16]: df1
200
1214 5.2 27.520 36.029 1023.327026 2900080 5.023 63.625 12-
04:38:
200
1740 5.3 27.904 36.040 1023.210999 2900080 4.910 63.375 12-
05:08:
200
1844 5.4 28.274 36.081 1023.119995 2900080 4.754 63.104 12-
05:41:
200
1110 5.5 27.749 36.145 1023.340027 2900080 5.139 63.844 12-
05:41:
200
333 6.5 28.999 36.061 1022.864014 2900164 5.286 59.843 11-
22:38:
200
1038 2007.5 2.735 34.792 1027.743042 2900164 5.685 59.218 12-
22:29:
200
1598 2007.6 2.997 34.800 1027.725952 2900167 6.857 62.363 12-
20:49:
200
686 2008.6 2.829 34.796 1027.738037 2900164 5.556 59.597 11-
22:31:
200
545 2009.9 2.903 34.802 1027.735962 2900168 9.313 60.849 11-
23:02:
200
967 2010.8 2.967 34.799 1027.728027 2900167 7.089 62.598 12-
20:55:
In [17]:
# Assuming df1 is your DataFrame containing "salinity" and "temperature" co
plt.figure(figsize=(13, 9))
# Scatter plot with colors based on salinity and temperature
scatter = plt.scatter(df1["salinity"], df1["temperature"], s=65, c=df1["sal
plt.xlabel('Salinity', fontsize=25)
plt.ylabel('Temperature', fontsize=25)
plt.title('Salinity vs Temperature', fontsize=25)
# Adding colorbar to show the mapping of colors to salinity values
#cbar = plt.colorbar(scatter)
#cbar.set_label('Salinity', fontsize=20)
plt.show()
In [18]: df1
200
1214 5.2 27.520 36.029 1023.327026 2900080 5.023 63.625 12-
04:38:
200
1740 5.3 27.904 36.040 1023.210999 2900080 4.910 63.375 12-
05:08:
200
1844 5.4 28.274 36.081 1023.119995 2900080 4.754 63.104 12-
05:41:
200
1110 5.5 27.749 36.145 1023.340027 2900080 5.139 63.844 12-
05:41:
200
333 6.5 28.999 36.061 1022.864014 2900164 5.286 59.843 11-
22:38:
200
1038 2007.5 2.735 34.792 1027.743042 2900164 5.685 59.218 12-
22:29:
200
1598 2007.6 2.997 34.800 1027.725952 2900167 6.857 62.363 12-
20:49:
200
686 2008.6 2.829 34.796 1027.738037 2900164 5.556 59.597 11-
22:31:
200
545 2009.9 2.903 34.802 1027.735962 2900168 9.313 60.849 11-
23:02:
200
967 2010.8 2.967 34.799 1027.728027 2900167 7.089 62.598 12-
20:55:
Out[20]: array([[36.029],
[36.04 ],
[36.081],
...,
[34.796],
[34.802],
[34.799]])
Out[21]: array([[27.52 ],
[27.904],
[28.274],
...,
[ 2.829],
[ 2.903],
[ 2.967]])
In [ ]:
In [23]: lin_reg=LinearRegression()
In [24]: lin_reg=LinearRegression()
lin_reg.fit(temp,salt)
Out[24]: LinearRegression()
In [25]: sns.set(font_scale=1)
plt.figure(figsize=(15, 15))
plt.scatter(temp,salt,s=65)
plt.plot(temp,lin_reg.predict(temp), color='red', linewidth='2')
plt.xlabel('Temperature',fontsize=25)
plt.ylabel('Salinity',fontsize=25)
plt.title('salinity prediction using temperature',fontsize=25)
plt.show()
Out[31]: LinearRegression()
Out[32]: array([[65.63395657]])
Out[33]: LinearRegression()
Out[34]: array([[65.63395657]])
In [37]: sns.set(font_scale=2.0)
plt.figure(figsize=(13, 9))
x_grid = np.arange(min(salt), max(salt), 0.1)
x_grid = x_grid.reshape(-1,1)
plt.scatter(salt,temp,s=65)
plt.plot(x_grid,lin_reg2.predict(pol.fit_transform(x_grid)) , color='red',
plt.xlabel('Slt',fontsize=25)
plt.ylabel('Temp',fontsize=25)
plt.title('salt degerlerine gore temp tahmin gosterimi',fontsize=25)
plt.show()
In [38]: x=df.drop(['salinity'],axis=1)
y=df[['salinity']]
In [ ]:
In [39]: x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
Out[40]: DecisionTreeRegressor()
In [ ]:
In [42]:
# Create Decision Tree Regressor and fit the model
tree_reg = DecisionTreeRegressor()
tree_reg.fit(temp, salt)
# Set seaborn font scale
sns.set(font_scale=2.0)
# Create a new figure
plt.figure(figsize=(13, 9))
# Create a grid for smoother plot
x_grid = np.arange(min(temp), max(temp), 0.1).reshape(-1, 1)
# Scatter plot
plt.scatter(temp, salt, s=65)
# Plot Decision Tree Regression line
plt.plot(x_grid, tree_reg.predict(x_grid), color='red', linewidth=5)
# Set labels and title
plt.xlabel('Temperature', fontsize=25)
plt.ylabel('Salinity', fontsize=25)
plt.title('Salinity Prediction based on Temperature (Decision Tree Regressi
# Show the plot
plt.show()
C:\Users\shiny\AppData\Local\Temp\ipykernel_22996\2504878549.py:4: DataCo
nversionWarning: A column-vector y was passed when a 1d array was expecte
d. Please change the shape of y to (n_samples,), for example using ravel
().
rf_reg.fit(x_train,y_train)
In [45]:
# Create Random Forest Regressor and fit the model
forest_reg = RandomForestRegressor(n_estimators=100, random_state=42)
forest_reg.fit(temp, salt)
# Set seaborn font scale
sns.set(font_scale=2.0)
# Create a new figure
plt.figure(figsize=(13, 9))
# Create a grid for smoother plot
x_grid = np.arange(min(temp), max(temp), 0.1).reshape(-1, 1)
# Scatter plot
plt.scatter(temp, salt, s=65)
# Plot Random Forest Regression line
plt.plot(x_grid, forest_reg.predict(x_grid), color='red', linewidth=5)
# Set labels and title
plt.xlabel('Temperature', fontsize=25)
plt.ylabel('Salinity', fontsize=25)
plt.title('Salinity Prediction based on Temperature (Random Forest Regressi
# Show the plot
plt.show()
C:\Users\shiny\AppData\Local\Temp\ipykernel_22996\311321806.py:3: DataCon
versionWarning: A column-vector y was passed when a 1d array was expecte
d. Please change the shape of y to (n_samples,), for example using ravel
().
forest_reg.fit(temp, salt)
In [ ]:
In [ ]:
In [ ]: