NLTK健康领域英文文本分词、词性标注、词频统计
import re import numpy as np import pandas as pd import nltk.tokenize as tk import nltk.corpus as nc handel_file = 'health_handel.csv' #分词好要保存的数据文件路径 #读取数据 data=pd.read_excel('health.xlsx') print(data.head(10)) stopwords = nc.stopwords.words('english') #停用词 tokenizer=tk.WordPunctTokenizer() #分词器
用户评论