D7net
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
usr
/
local
/
ssl
/
local
/
lib
/
python3.6
/
site-packages
/
spacy_lookups_data
/
tests
/
Filename :
test_en.py
back
Copy
# coding: utf-8 from __future__ import unicode_literals from spacy.lang.en import English from spacy.tokens import Doc import pytest @pytest.fixture(scope="session") def en_nlp(): return English() @pytest.mark.parametrize("text", ["faster", "fastest", "better", "best"]) def test_en_lemmatizer_handles_irreg_adverbs(en_nlp, text): tokens = en_nlp(text) assert tokens[0].lemma_ in ["fast", "well"] def test_issue4104(en_nlp): """Test that English lookup lemmatization of spun & dry are correct expected mapping = {'dry': 'dry', 'spun': 'spin', 'spun-dry': 'spin-dry'} """ doc = Doc(en_nlp.vocab, words=["dry", "spun", "spun-dry"]) assert [token.lemma_ for token in doc] == ["dry", "spin", "spin-dry"] @pytest.mark.parametrize( "text,norms", [("I'm", ["i", "am"]), ("shan't", ["shall", "not"])] ) def test_en_tokenizer_norm_exceptions(en_nlp, text, norms): tokens = en_nlp(text) assert [token.norm_ for token in tokens] == norms