import pandas as pd import matplotlib.pyplot as plt import numpy as np data = pd.read_csv("referendum-categories.csv") data['respondent'] = data['respondent'].str.strip() data['comment'] = data['comment'].str.strip() cv = data['comment'].value_counts() # a list of each category and respective counts cat_and_value = pd.DataFrame() # a dataframe for categories of count > 1 ones = pd.DataFrame() # a dataframe for categories of count = 1 for ind, row in cv.items(): # iterating through comment and count if row == 1: # sorting categories where count = 1 new_row = pd.DataFrame({'comment': [ind], 'count' : [row]}) ones = pd.concat([ones, new_row],ignore_index = True) else: # all other categories new_row = pd.DataFrame({'comment': [ind], 'count' : [row]}) cat_and_value = pd.concat([cat_and_value, new_row],ignore_index = True) ones.to_csv("ones.csv") cat_and_value.to_csv("comments.csv") print(len(ones)) # A plot for category counts where the count > 1 plt.plot(cat_and_value['count'], marker='o') plt.title("Ranked most popular category count") plt.ylabel("Category Count") plt.show()