Here is my Python code:
import string
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# reading the file
with open('Hamlet.txt', encoding='utf-8') as f:
lines = [line.rstrip() for line in f]
# removing punctuation
sentences = [sent.translate(str.maketrans('', '', string.punctuation)).split() for sent in lines if sent != '']
words = [sent for sentence in sentences for sent in sentence] # collecting all the words
words[:5]
# frequency of items in a list
word_freq = Counter(words)
top_word_freq = word_freq.most_common(20)
top_words = [word[0] for word in top_word_freq] # all the top frequent words
freq = [word[1] for word in top_word_freq] # all the frequencies
# max freq in red color in bar plot
bar_plot = plt.bar(top_words, freq)
plt.xticks(rotation=45)
comment_words = '' # creating a string of all the words
for tokens in words:
comment_words += "".join(tokens) + " "
word_cloud = WordCloud(collocations=False, background_color='white').generate(comment_words)
# Display the generated Word Cloud
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.show()
I am getting the following error:
UnicodeDecodeError Traceback (most recent call last) Cel1 In[34], line 8 6 # reading the file 7 with open('Hamlet.txt') as f: 8<- lines = [line.rstrip() for line in f] 10 # removing punctuation 11 s=[sent.translatestr.maketrans', :., string.punctuation)).split() for sent in lines if sent !=..
Cell In[34],1ine 8, in <listcomp>.0 6 # reading the file 7 with open('Hamlet.txt') as f: 8<- lines = [line.rstrip() for line in f] 10 # removing punctuation 11 s = [sent.translate(str.maketrans'.,.., string.punctuation.split( for sent in lines if sent!=:.
File anaconda3Libencodingscp1252.py:23, in IncrementalDecoder.decode(self, input, final) 22 def decode(self, input,final=False): > 23 return codecs.charmap_decode(input,self.errors,decoding_table)[e]
UnicodeDecodeError:'charmap' codec can't decode byte Ox9d in position 2331: character maps to <undefined>