from collections import Counter
import re
words = re.findall(r'\w+', open('d:\sitemap.php').read().lower())
cw = Counter(words).most_common(7)
print(cw)
'''
run:
[('a', 24), ('td', 24), ('keyword', 17), ('php', 17), ('href', 13), ('query', 12), ('index', 12)]
'''