@zhengyuhong
2015-04-09T08:06:55.000000Z
字数 659
阅读 1130
NLTK Python 文本挖掘 NLP
>>> import nltk>>> sentence = """At eight o'clock on Thursday morning... Arthur didn't feel very good.""">>> tokens = nltk.word_tokenize(sentence)>>> tokens['At', 'eight', "o'clock", 'on', 'Thursday', 'morning','Arthur', 'did', "n't", 'feel', 'very', 'good', '.']>>> tagged = nltk.pos_tag(tokens)>>> tagged[0:6][('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),('Thursday', 'NNP'), ('morning', 'NN')]>>> entities = nltk.chunk.ne_chunk(tagged)>>> entitiesTree('S', [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'),('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN'),Tree('PERSON', [('Arthur', 'NNP')]),('did', 'VBD'), ("n't", 'RB'), ('feel', 'VB'),('very', 'RB'), ('good', 'JJ'), ('.', '.')])
