How to create a word cloud from a corpus in Python?
From Creating a subset of words from a corpus in R, the answerer can easily convert a term-document matrix
into a word cloud easily.
Is there a similar function from python libraries that takes either a raw word textfile or NLTK
corpus or Gensim
Mmcorpus into a word cloud?
The result will look somewhat like this:
Solution 1:
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)
def show_wordcloud(data, title = None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1 # chosen at random by flipping a coin; it was heads
).generate(str(data))
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
show_wordcloud(Samsung_Reviews_Negative['Reviews'])
show_wordcloud(Samsung_Reviews_positive['Reviews'])
Solution 2:
In case you require these word clouds for showing them in website or web app you can convert your data to json or csv format and load it to a JavaScript visualisation library such as d3. Word Clouds on d3
If not, Marcin's answer is a good way for doing what you describe.
Solution 3:
Example of amueller's code in action
In command-line / terminal:
sudo pip install wordcloud
Then run python script:
## Simple WordCloud
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
text = 'all your base are belong to us all of your base base base'
def generate_wordcloud(text): # optionally add: stopwords=STOPWORDS and change the arg below
wordcloud = WordCloud(font_path='/Library/Fonts/Verdana.ttf',
width=800, height=400,
relative_scaling = 1.0,
stopwords = {'to', 'of'} # set or space-separated string
).generate(text)
fig = plt.figure(1, figsize=(8, 4))
plt.axis('off')
plt.imshow(wordcloud)
plt.axis("off")
## Pick One:
# plt.show()
plt.savefig("WordCloud.png")
generate_wordcloud(text)
Solution 4:
here is the short code
#make wordcoud
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)
def show_wordcloud(data, title = None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1 # chosen at random by flipping a coin; it was heads
).generate(str(data))
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
if __name__ == '__main__':
show_wordcloud(text_str)