from urllib.request import urlopen #importing the libraries for…
Question Answered step-by-step from urllib.request import urlopen #importing the libraries for… from urllib.request import urlopen #importing the libraries for html and url operationsfrom urllib.parse import urljoinfrom html.parser import HTMLParserwebsite_data = []top_values_list = []# function to collect the tags linksclass Collector(HTMLParser): approved_tags = [“h1”, “h2″,”h3″,”h4″,”h5″,”h6”,’p’,’li’] def __init__(self, url): HTMLParser.__init__(self) self.url = url self.links = [] self.html_tag = None #function to check for the start tags and check for hyperlinks def handle_starttag(self, tag, attrs): self.html_tag = tag if tag == “a”: for attr in attrs: if attr[0] == “href”: absolute = urljoin(self.url, attr[1]) if absolute[:4] == “http”: self.links.append(absolute) def getLinks(self): return self.links def handle_data(self, data): global website_data if self.html_tag in Collector.approved_tags: website_data.append(data) def getData(self): global website_data return website_data visited = set()#function to get the words def content_cleanUp(self,content): new_content = [] for word in content: words = word.split() for w in words: new_content.append(w.lower()) return new_content #function to analyze the data collected def analyze(self,url): content = urlopen(url).read().decode() collector = Collector(url) collector.feed(content) url = collector.getLinks() content = collector.getData() clean_content = content_cleanUp(content) global top_values_list top_values_list.append(clean_content) return top_values_list #function to count the words in the list def frequency(self,top_values_list): counter = {} for fwords in top_values_list: for w in fwords: if w in counter: counter[w] += 1 else: counter[w] = 1 return counter#function to store the top 25 words def find_top_words(self,counterV): top_ones = [] top_words = [] top_values = [] for x, y in counterV.items(): top_ones.append(y) top_ones.sort(reverse = True) top25 = top_ones[0:25] if y in top25: top_words.append(x) top_values.append(y) return top_words, top_values# function to display the words def display(self,top_words, top_values): for i, word in enumerate(top_words): print(“n{:50} {:5}”.format(word, top_values[i])) #main function contains the main urldef main(): url=’https://www.cdm.depaul.edu/Pages/default.aspx’ collector=Collector(url) top_values_list=collector.analyze(‘https://www.cdm.depaul.edu/Pages/default.aspx’) counterV = collector.frequency(top_values_list) top_words, top_values = collector.find_top_words(counterV) collector.display(top_words, top_values) I am getting error in this code. Computer Science Engineering & Technology Python Programming DSC 430 Share QuestionEmailCopy link Comments (0)


