Skip to content

Commit

Permalink
fix: update model package for notebook demo
Browse files Browse the repository at this point in the history
  • Loading branch information
howl-anderson committed Mar 12, 2020
1 parent cc2047d commit 8ed1437
Showing 1 changed file with 74 additions and 87 deletions.
161 changes: 74 additions & 87 deletions notebooks/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,27 +55,14 @@
}
],
"source": [
"!pip install -q https://github.com/howl-anderson/Chinese_models_for_SpaCy/releases/download/v2.0.3/zh_core_web_sm-2.0.3.tar.gz"
"!pip install -q https://github.com/howl-anderson/Chinese_models_for_SpaCy/releases/download/v2.2.X-0.1.0/zh_core_web_sm-0.1.0.tar.gz"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
"Collecting jieba\n",
"Installing collected packages: jieba\n",
"Successfully installed jieba-0.39\n",
"\u001b[33mYou are using pip version 10.0.1, however version 18.0 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"outputs": [],
"source": [
"!pip install -q pandas\n",
"!pip install -q jieba\n",
Expand All @@ -91,7 +78,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -111,7 +98,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -127,7 +114,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -136,8 +123,8 @@
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache /tmp/jieba.cache\n",
"Loading model cost 0.588 seconds.\n",
"Prefix dict has been built succesfully.\n"
"Loading model cost 0.435 seconds.\n",
"Prefix dict has been built successfully.\n"
]
}
],
Expand All @@ -154,7 +141,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {
"scrolled": false
},
Expand Down Expand Up @@ -206,26 +193,26 @@
" <td>xxx</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>B</td>\n",
" <td>PERSON</td>\n",
" <td>0.392991</td>\n",
" <td>0.000000</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>在</td>\n",
" <td>在</td>\n",
" <td>X</td>\n",
" <td>VERB</td>\n",
" <td>VV</td>\n",
" <td>acl</td>\n",
" <td>case</td>\n",
" <td>x</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>O</td>\n",
" <td></td>\n",
" <td>7.318524</td>\n",
" <td>6.573987</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -234,30 +221,30 @@
" <td>北京</td>\n",
" <td>X</td>\n",
" <td>NNP</td>\n",
" <td>det</td>\n",
" <td>nmod</td>\n",
" <td>xx</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>B</td>\n",
" <td>GPE</td>\n",
" <td>10.940736</td>\n",
" <td>12.769391</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>的</td>\n",
" <td>的</td>\n",
" <td>X</td>\n",
" <td>PART</td>\n",
" <td>DEC</td>\n",
" <td>case:dec</td>\n",
" <td>x</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>O</td>\n",
" <td></td>\n",
" <td>6.201293</td>\n",
" <td>6.886564</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
Expand All @@ -266,21 +253,21 @@
" <td>清华大学</td>\n",
" <td>X</td>\n",
" <td>NNP</td>\n",
" <td>obj</td>\n",
" <td>obl</td>\n",
" <td>xxxx</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>B</td>\n",
" <td>GPE</td>\n",
" <td>12.044737</td>\n",
" <td>ORG</td>\n",
" <td>18.842812</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>读书</td>\n",
" <td>读书</td>\n",
" <td>X</td>\n",
" <td>VERB</td>\n",
" <td>VV</td>\n",
" <td>ROOT</td>\n",
" <td>xx</td>\n",
Expand All @@ -289,32 +276,32 @@
" <td>True</td>\n",
" <td>O</td>\n",
" <td></td>\n",
" <td>11.602811</td>\n",
" <td>18.138533</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text lemma_ pos_ tag_ dep_ shape_ is_alpha is_stop has_vector \\\n",
"0 王小明 王小明 X NNP nsubj xxx True False True \n",
"1 在 在 X VV acl x True False True \n",
"2 北京 北京 X NNP det xx True False True \n",
"3 的 的 X DEC case:dec x True False True \n",
"4 清华大学 清华大学 X NNP obj xxxx True False True \n",
"5 读书 读书 X VV ROOT xx True False True \n",
" text lemma_ pos_ tag_ dep_ shape_ is_alpha is_stop has_vector \\\n",
"0 王小明 王小明 X NNP nsubj xxx True False False \n",
"1 在 在 VERB VV case x True True True \n",
"2 北京 北京 X NNP nmod xx True False True \n",
"3 的 的 PART DEC case:dec x True True True \n",
"4 清华大学 清华大学 X NNP obl xxxx True False True \n",
"5 读书 读书 VERB VV ROOT xx True False True \n",
"\n",
" ent_iob_ ent_type_ vector_norm is_oov \n",
"0 B PERSON 0.392991 True \n",
"1 O 7.318524 False \n",
"2 B GPE 10.940736 False \n",
"3 O 6.201293 False \n",
"4 B GPE 12.044737 False \n",
"5 O 11.602811 False "
"0 B PERSON 0.000000 True \n",
"1 O 6.573987 False \n",
"2 B GPE 12.769391 False \n",
"3 O 6.886564 False \n",
"4 B ORG 18.842812 False \n",
"5 O 18.138533 False "
]
},
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -346,83 +333,83 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" id=\"0\" class=\"displacy\" width=\"1100\" height=\"574.5\" style=\"max-width: none; height: 574.5px; color: #000000; background: #ffffff; font-family: Arial\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"zh\" id=\"c3b1235515374dfd9468f0e90464c5a9-0\" class=\"displacy\" width=\"1100\" height=\"487.0\" direction=\"ltr\" style=\"max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">王小明</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">X</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">在</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">X</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VERB</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">北京</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">X</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">的</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">X</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">PART</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">清华大学</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">X</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">读书</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">X</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">VERB</tspan>\n",
"</text>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-0-0\" stroke-width=\"2px\" d=\"M70,439.5 C70,2.0 925.0,2.0 925.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-c3b1235515374dfd9468f0e90464c5a9-0-0\" stroke-width=\"2px\" d=\"M70,352.0 C70,2.0 925.0,2.0 925.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-0-0\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
" <textPath xlink:href=\"#arrow-c3b1235515374dfd9468f0e90464c5a9-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M70,441.5 L62,429.5 78,429.5\" fill=\"currentColor\"/>\n",
" <path class=\"displacy-arrowhead\" d=\"M70,354.0 L62,342.0 78,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-0-1\" stroke-width=\"2px\" d=\"M245,439.5 C245,89.5 920.0,89.5 920.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-c3b1235515374dfd9468f0e90464c5a9-0-1\" stroke-width=\"2px\" d=\"M245,352.0 C245,89.5 745.0,89.5 745.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-0-1\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">acl</textPath>\n",
" <textPath xlink:href=\"#arrow-c3b1235515374dfd9468f0e90464c5a9-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">case</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M245,441.5 L237,429.5 253,429.5\" fill=\"currentColor\"/>\n",
" <path class=\"displacy-arrowhead\" d=\"M245,354.0 L237,342.0 253,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-0-2\" stroke-width=\"2px\" d=\"M420,439.5 C420,264.5 735.0,264.5 735.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-c3b1235515374dfd9468f0e90464c5a9-0-2\" stroke-width=\"2px\" d=\"M420,352.0 C420,177.0 740.0,177.0 740.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-0-2\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
" <textPath xlink:href=\"#arrow-c3b1235515374dfd9468f0e90464c5a9-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nmod</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M420,441.5 L412,429.5 428,429.5\" fill=\"currentColor\"/>\n",
" <path class=\"displacy-arrowhead\" d=\"M420,354.0 L412,342.0 428,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-0-3\" stroke-width=\"2px\" d=\"M420,439.5 C420,352.0 555.0,352.0 555.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-c3b1235515374dfd9468f0e90464c5a9-0-3\" stroke-width=\"2px\" d=\"M420,352.0 C420,264.5 560.0,264.5 560.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-0-3\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">case:dec</textPath>\n",
" <textPath xlink:href=\"#arrow-c3b1235515374dfd9468f0e90464c5a9-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">case:dec</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M555.0,441.5 L563.0,429.5 547.0,429.5\" fill=\"currentColor\"/>\n",
" <path class=\"displacy-arrowhead\" d=\"M560.0,354.0 L568.0,342.0 552.0,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-0-4\" stroke-width=\"2px\" d=\"M245,439.5 C245,177.0 740.0,177.0 740.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-c3b1235515374dfd9468f0e90464c5a9-0-4\" stroke-width=\"2px\" d=\"M770,352.0 C770,264.5 910.0,264.5 910.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-0-4\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">obj</textPath>\n",
" <textPath xlink:href=\"#arrow-c3b1235515374dfd9468f0e90464c5a9-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">obl</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M740.0,441.5 L748.0,429.5 732.0,429.5\" fill=\"currentColor\"/>\n",
" <path class=\"displacy-arrowhead\" d=\"M770,354.0 L762,342.0 778,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"</svg>"
],
Expand All @@ -431,7 +418,7 @@
]
},
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
Expand All @@ -440,26 +427,26 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div class=\"entities\" style=\"line-height: 2.5\">\n",
"<mark class=\"entity\" style=\"background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
"<div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n",
"<mark class=\"entity\" style=\"background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" 王小明\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">PERSON</span>\n",
"</mark>\n",
"\n",
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" 北京\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
"</mark>\n",
"\n",
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
" 清华大学\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
"</mark>\n",
"读书</div>"
],
Expand All @@ -468,7 +455,7 @@
]
},
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
Expand All @@ -493,7 +480,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 8ed1437

Please sign in to comment.