SAMPLE CODE
[Link]
from tkinter import messagebox
from tkinter import *
from tkinter import simpledialog
import tkinter
import [Link] as plt
import numpy as np
from tkinter import ttk
from tkinter import filedialog
import pandas as pd
from sklearn.model_selection import train_test_split
from string import punctuation
from [Link] import stopwords
import nltk
from [Link] import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from [Link] import LabelEncoder
from [Link] import Sequential
from [Link] import Dense,Activation,Dropout
from [Link] import OneHotEncoder
import [Link]
from [Link] import model_from_json
import pickle
import os
from [Link] import normalize
from [Link] import Sequential
from [Link] import Dense, Dropout, Flatten, LSTM
main = Tk()
[Link]("DETECTION OF FAKE NEWS THROUGH
IMPLEMENTATION OF DATA SCIENCE APPLICATION")
[Link]("1300x1200")
global filename
global X, Y
global tfidf_X_train, tfidf_X_test, tfidf_y_train, tfidf_y_test
global tfidf_vectorizer
global accuracy,error
stop_words = set([Link]('english'))
lemmatizer = WordNetLemmatizer()
textdata = []
labels = []
global classifier
def cleanPost(doc):
tokens = [Link]()
table = [Link]('', '', punctuation)
tokens = [[Link](table) for w in tokens]
tokens = [word for word in tokens if [Link]()]
tokens = [w for w in tokens if not w in stop_words]
tokens = [word for word in tokens if len(word) > 1]
tokens = [[Link](token) for token in tokens]
tokens = ' '.join(tokens)
return tokens
def uploadDataset():
global filename
[Link]('1.0', END)
filename =
[Link](initialdir="TwitterNewsData")
[Link]()
[Link]()
dataset = pd.read_csv(filename)
dataset = [Link](' ')
for i in range(len(dataset)):
msg = dataset.get_value(i, 'text')
label = dataset.get_value(i, 'target')
msg = str(msg)
msg = [Link]().lower()
[Link](int(label))
clean = cleanPost(msg)
[Link](clean)
[Link](END,clean+" ==== "+str(label)+"\n")
def preprocess():
[Link]('1.0', END)
global X, Y
global tfidf_vectorizer
global tfidf_X_train, tfidf_X_test, tfidf_y_train, tfidf_y_test
stopwords=stopwords = [Link]("english")
tfidf_vectorizer = TfidfVectorizer(stop_words=stopwords,
use_idf=True, ngram_range=(1,2),smooth_idf=False, norm=None,
decode_error='replace', max_features=200)
tfidf = tfidf_vectorizer.fit_transform(textdata).toarray()
df = [Link](tfidf,
columns=tfidf_vectorizer.get_feature_names())
[Link](END,str(df))
print([Link])
df = [Link]
X = df[:, 0:[Link][1]]
X = normalize(X)
Y = [Link](labels)
le = LabelEncoder()
Y = le.fit_transform(Y)
indices = [Link]([Link][0])
[Link](indices)
X = X[indices]
Y = Y[indices]
Y = [Link](-1, 1)
print([Link])
encoder = OneHotEncoder(sparse=False)
#Y = encoder.fit_transform(Y)
X = [Link](([Link][0], [Link][1], 1))
print(Y)
print([Link])
print([Link])
tfidf_X_train, tfidf_X_test, tfidf_y_train, tfidf_y_test =
train_test_split(X, Y, test_size=0.2)
[Link](END,"\n\nTotal News found in dataset : "+str(len(X))
+"\n")
[Link](END,"Total records used to train machine learning
algorithms : "+str(len(tfidf_X_train))+"\n")
[Link](END,"Total records used to test machine learning
algorithms : "+str(len(tfidf_X_test))+"\n")
def runLSTM():
[Link]('1.0', END)
global classifier
if [Link]('model/[Link]'):
with open('model/[Link]', "r") as json_file:
loaded_model_json = json_file.read()
classifier = model_from_json(loaded_model_json)
classifier.load_weights("model/model_weights.h5")
classifier._make_predict_function()
print([Link]())
f = open('model/[Link]', 'rb')
data = [Link](f)
[Link]()
acc = data['accuracy']
acc = acc[9] * 100
[Link](END,"LSTM Fake News Detection Accuracy :
"+str(acc)+"\n\n")
[Link](END,'LSTM Model Summary can be seen in black
console for layer details\n')
with open('model/[Link]', 'rb') as file:
classifier = [Link](file)
[Link]()
else:
lstm_model = Sequential()
lstm_model.add(LSTM(128, input_shape=([Link][1:]),
activation='relu', return_sequences=True))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(128, activation='relu'))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(32, activation='relu'))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(2, activation='softmax'))
lstm_model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
hist = lstm_model.fit(X, Y, epochs=10,
validation_data=(tfidf_X_test, tfidf_y_test))
classifier = lstm_model
classifier.save_weights('model/model_weights.h5')
model_json = classifier.to_json()
with open("model/[Link]", "w") as json_file:
json_file.write(model_json)
accuracy = [Link]
f = open('model/[Link]', 'wb')
[Link](accuracy, f)
[Link]()
acc = accuracy['accuracy']
acc = acc[9] * 100
[Link](END,"LSTM Accuracy : "+str(acc)+"\n\n")
[Link](END,'LSTM Model Summary can be seen in black
console for layer details\n')
print(lstm_model.summary())
def graph():
f = open('model/[Link]', 'rb')
data = [Link](f)
[Link]()
acc = data['accuracy']
loss = data['loss']
[Link](figsize=(10,6))
[Link](True)
[Link]('Epcchs')
[Link]('Accuracy/Loss')
[Link](acc, 'ro-', color = 'green')
[Link](loss, 'ro-', color = 'blue')
[Link](['Accuracy','Loss'], loc='upper left')
#[Link]([Link])
[Link]('LSTM Model Accuracy & Loss Graph')
[Link]()
def predict():
testfile =
[Link](initialdir="TwitterNewsData")
testData = pd.read_csv(testfile)
[Link]('1.0', END)
testData = [Link]
testData = testData[:,0]
print(testData)
for i in range(len(testData)):
msg = testData[i]
msg1 = testData[i]
print(msg)
review = [Link]()
review = [Link]().lower()
review = cleanPost(review)
testReview = tfidf_vectorizer.transform([review]).toarray()
predict = [Link](testReview)
print(predict)
if predict == 0:
[Link](END,msg1+" === Given news predicted as
GENUINE\n\n")
else:
[Link](END,msg1+" == Given news predicted as FAKE\
n\n")
font = ('times', 15, 'bold')
title = Label(main, text='DETECTION OF FAKE NEWS
THROUGH IMPLEMENTATION OF DATA SCIENCE
APPLICATION')
[Link](bg='gold2', fg='thistle1')
[Link](font=font)
[Link](height=3, width=120)
[Link](x=0,y=5)
font1 = ('times', 13, 'bold')
ff = ('times', 12, 'bold')
uploadButton = Button(main, text="Upload Fake News Dataset",
command=uploadDataset)
[Link](x=20,y=100)
[Link](font=ff)
processButton = Button(main, text="Preprocess Dataset",
command=preprocess)
[Link](x=20,y=150)
[Link](font=ff)
dtButton = Button(main, text="Run LSTM Algorithm",
command=runLSTM)
[Link](x=20,y=200)
[Link](font=ff)
graphButton = Button(main, text="Accuracy & Loss Graph",
command=graph)
[Link](x=20,y=250)
[Link](font=ff)
predictButton = Button(main, text="Test News Detection",
command=predict)
[Link](x=20,y=300)
[Link](font=ff)
font1 = ('times', 12, 'bold')
text=Text(main,height=30,width=100)
scroll=Scrollbar(text)
[Link](yscrollcommand=[Link])
[Link](x=330,y=100)
[Link](font=font1)
[Link](bg='DarkSlateGray1')
[Link]()