pip install tensorflow-datasets
Requirement already satisfied: tensorflow-datasets in c:\users\bharg\
anaconda3\lib\site-packages (4.9.4)
Requirement already satisfied: absl-py in c:\users\bharg\anaconda3\
lib\site-packages (from tensorflow-datasets) (1.4.0)
Requirement already satisfied: click in c:\users\bharg\anaconda3\lib\
site-packages (from tensorflow-datasets) (8.0.4)
Requirement already satisfied: dm-tree in c:\users\bharg\anaconda3\
lib\site-packages (from tensorflow-datasets) (0.1.8)
Requirement already satisfied: etils[enp,epath,etree]>=0.9.0 in c:\
users\bharg\anaconda3\lib\site-packages (from tensorflow-datasets)
(1.8.0)
Requirement already satisfied: numpy in c:\users\bharg\anaconda3\lib\
site-packages (from tensorflow-datasets) (1.24.3)
Requirement already satisfied: promise in c:\users\bharg\anaconda3\
lib\site-packages (from tensorflow-datasets) (2.3)
Requirement already satisfied: protobuf>=3.20 in c:\users\bharg\
anaconda3\lib\site-packages (from tensorflow-datasets) (3.20.3)
Requirement already satisfied: psutil in c:\users\bharg\anaconda3\lib\
site-packages (from tensorflow-datasets) (5.9.0)
Requirement already satisfied: requests>=2.19.0 in c:\users\bharg\
anaconda3\lib\site-packages (from tensorflow-datasets) (2.31.0)
Requirement already satisfied: tensorflow-metadata in c:\users\bharg\
anaconda3\lib\site-packages (from tensorflow-datasets) (1.14.0)
Requirement already satisfied: termcolor in c:\users\bharg\anaconda3\
lib\site-packages (from tensorflow-datasets) (2.4.0)
Requirement already satisfied: toml in c:\users\bharg\anaconda3\lib\
site-packages (from tensorflow-datasets) (0.10.2)
Requirement already satisfied: tqdm in c:\users\bharg\anaconda3\lib\
site-packages (from tensorflow-datasets) (4.65.0)
Requirement already satisfied: wrapt in c:\users\bharg\anaconda3\lib\
site-packages (from tensorflow-datasets) (1.14.1)
Requirement already satisfied: fsspec in c:\users\bharg\anaconda3\lib\
site-packages (from etils[enp,epath,etree]>=0.9.0->tensorflow-
datasets) (2023.4.0)
Requirement already satisfied: importlib_resources in c:\users\bharg\
anaconda3\lib\site-packages (from etils[enp,epath,etree]>=0.9.0-
>tensorflow-datasets) (6.4.0)
Requirement already satisfied: typing_extensions in c:\users\bharg\
anaconda3\lib\site-packages (from etils[enp,epath,etree]>=0.9.0-
>tensorflow-datasets) (4.7.1)
Requirement already satisfied: zipp in c:\users\bharg\anaconda3\lib\
site-packages (from etils[enp,epath,etree]>=0.9.0->tensorflow-
datasets) (3.11.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\
bharg\anaconda3\lib\site-packages (from requests>=2.19.0->tensorflow-
datasets) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\bharg\
anaconda3\lib\site-packages (from requests>=2.19.0->tensorflow-
datasets) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\bharg\
anaconda3\lib\site-packages (from requests>=2.19.0->tensorflow-
datasets) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\bharg\
anaconda3\lib\site-packages (from requests>=2.19.0->tensorflow-
datasets) (2023.7.22)
Requirement already satisfied: colorama in c:\users\bharg\anaconda3\
lib\site-packages (from click->tensorflow-datasets) (0.4.6)
Requirement already satisfied: six in c:\users\bharg\anaconda3\lib\
site-packages (from promise->tensorflow-datasets) (1.16.0)
Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in
c:\users\bharg\anaconda3\lib\site-packages (from tensorflow-metadata-
>tensorflow-datasets) (1.63.0)
Note: you may need to restart the kernel to use updated packages.
import tensorflow as tf
import tensorflow_datasets as tfds
# Load CIFAR-100 dataset
(train_ds, test_ds), info = tfds.load('cifar100', split=['train',
'test'], with_info=True)
# Preprocess the data
def preprocess_data(sample):
image = tf.image.resize(sample['image'], (32, 32)) / 255.0 #
Resize and normalize
label = tf.one_hot(sample['label'], depth=100) # One-hot encode
labels
return image, label
train_ds = train_ds.map(preprocess_data).shuffle(1000).batch(64)
test_ds = test_ds.map(preprocess_data).batch(64)
# Build the model
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(32, 32, 3)),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(100, activation='softmax')
])
# Compile the model
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
# Train the model
model.fit(train_ds, epochs=10, validation_data=test_ds)
# Evaluate the model
test_loss, test_acc = model.evaluate(test_ds)
print(f'Test accuracy: {test_acc}')
Epoch 1/10
C:\Users\bharg\anaconda3\Lib\site-packages\keras\src\layers\
convolutional\base_conv.py:99: UserWarning: Do not pass an
`input_shape`/`input_dim` argument to a layer. When using Sequential
models, prefer using an `Input(shape)` object as the first layer in
the model instead.
super().__init__(
782/782 ━━━━━━━━━━━━━━━━━━━━ 17s 18ms/step - accuracy: 0.0484 - loss:
4.2915 - val_accuracy: 0.1532 - val_loss: 3.5971
Epoch 2/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.1759 - loss:
3.4663 - val_accuracy: 0.2183 - val_loss: 3.2293
Epoch 3/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.2337 - loss:
3.1450 - val_accuracy: 0.2553 - val_loss: 3.0379
Epoch 4/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 14s 17ms/step - accuracy: 0.2725 - loss:
2.9479 - val_accuracy: 0.2803 - val_loss: 2.9086
Epoch 5/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.3032 - loss:
2.7833 - val_accuracy: 0.2920 - val_loss: 2.8498
Epoch 6/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.3274 - loss:
2.6570 - val_accuracy: 0.3246 - val_loss: 2.6955
Epoch 7/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.3482 - loss:
2.5506 - val_accuracy: 0.3382 - val_loss: 2.6332
Epoch 8/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.3655 - loss:
2.4663 - val_accuracy: 0.3504 - val_loss: 2.5762
Epoch 9/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.3859 - loss:
2.3841 - val_accuracy: 0.3444 - val_loss: 2.5976
Epoch 10/10
782/782 ━━━━━━━━━━━━━━━━━━━━ 13s 17ms/step - accuracy: 0.3957 - loss:
2.3275 - val_accuracy: 0.3557 - val_loss: 2.5423
157/157 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - accuracy: 0.3511 - loss:
2.5547
Test accuracy: 0.35569998621940613
Project 2
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
# Load the dataset
data = pd.read_csv("C:\\Users\\bharg\\Downloads\\archive (2)\\
training.1600000.processed.noemoticon.csv", encoding="ISO-8859-1",
header=None,
names=["target", "ids", "date", "flag", "user",
"text"])
# Drop unnecessary columns
data.drop(["ids", "date", "flag", "user"], axis=1, inplace=True)
# Clean the text data
def clean_text(text):
# Remove URLs, mentions, and hashtags
text = re.sub(r"http\S+|@\S+|#\S+", "", text)
# Remove special characters and punctuation
text = re.sub(r"[^a-zA-Z0-9]", " ", text)
# Convert to lowercase
text = text.lower()
# Remove extra whitespace
text = re.sub(r"\s+", " ", text).strip()
return text
data["text"] = data["text"].apply(clean_text)
# Encode target labels
data["target"] = data["target"].replace({0: 0, 2: 1, 4: 2})
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data["text"],
data["target"], test_size=0.2, random_state=42)
# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
# Train Logistic Regression model
logistic_reg = LogisticRegression(max_iter=1000)
logistic_reg.fit(X_train_tfidf, y_train)
# Evaluate the model
y_pred = logistic_reg.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
Accuracy: 0.79061875
precision recall f1-score support
0 0.80 0.78 0.79 159494
2 0.78 0.80 0.79 160506
accuracy 0.79 320000
macro avg 0.79 0.79 0.79 320000
weighted avg 0.79 0.79 0.79 320000
Confusion Matrix:
[[124040 35454]
[ 31548 128958]]
PROJECT3
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer,
TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
# Load the dataset
data = pd.read_csv("C:\\Users\\bharg\\OneDrive\\Desktop\\
train.ft.txt")
# Drop unnecessary columns
data.drop(["Id"], axis=1, inplace=True)
# Clean the text data
def clean_text(text):
# Remove special characters and punctuation
text = re.sub(r"[^a-zA-Z0-9]", " ", text)
# Convert to lowercase
text = text.lower()
# Tokenize the text
tokens = word_tokenize(text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word not in
stop_words]
# Join tokens back into text
clean_text = " ".join(filtered_tokens)
return clean_text
data["Review"] = data["Review"].apply(clean_text)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data["Review"],
data["Rating"], test_size=0.2, random_state=42)
# Create a pipeline for text classification
text_clf = Pipeline([
('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', MultinomialNB()),
])
# Train the model
text_clf.fit(X_train, y_train)
# Evaluate the model
y_pred = text_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))