{"id":636,"date":"2018-10-26T11:30:14","date_gmt":"2018-10-26T02:30:14","guid":{"rendered":"https:\/\/dong1lkim.oboki.net\/?p=636"},"modified":"2019-09-01T22:20:24","modified_gmt":"2019-09-01T13:20:24","slug":"elasticsearch-%ed%95%9c%ea%b8%80-%ed%98%95%ed%83%9c%ec%86%8c-%eb%b6%84%ec%84%9d%ea%b8%b0-nori","status":"publish","type":"post","link":"https:\/\/oboki.net\/workspace\/data-engineering\/elasticsearch\/elasticsearch-%ed%95%9c%ea%b8%80-%ed%98%95%ed%83%9c%ec%86%8c-%eb%b6%84%ec%84%9d%ea%b8%b0-nori\/","title":{"rendered":"[ElasticSearch] \ud55c\uae00 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30 nori"},"content":{"rendered":"<h1>\ud55c\uae00 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30 nori<\/h1>\n<h2>\uc18c\uac1c<\/h2>\n<h2>\uc124\uce58<\/h2>\n<p><code>elasticsearch-plugin<\/code> \uc774\uc6a9\ud574\uc11c \uc124\uce58. \ubaa8\ub4e0 \ub178\ub4dc\uc5d0 \uc124\uce58 \ub418\uc5b4\uc57c\ud558\uace0 \uc124\uce58 \uc774\ud6c4\uc5d0\ub294 \ub178\ub4dc \ubcc4 \uc7ac\uae30\ub3d9\uc774 \ud544\uc694.<\/p>\n<pre><code class=\"bash\">elasticsearch-plugin install analysis-nori\n<\/code><\/pre>\n<h2>Analysis<\/h2>\n<p>\ud615\ud0dc\uc18c \ubd84\uc11d\uc744 \uc801\uc6a9\ud574\ubcf4\uace0 \uc2f6\uc740 text \uc5d0 \ub300\ud574 \ub2e4\uc74c\uacfc \uac19\uc774 analysis \ud14c\uc2a4\ud2b8\uac00 \uac00\ub2a5\ud558\ub2e4.<\/p>\n<pre><code class=\"json\">curl -X GET \"$HOSTNAME:9200\/_analyze?pretty\" -H 'Content-Type: application\/json' -d'\n{\n  \"tokenizer\": \"nori_tokenizer\",\n  \"text\": \"\ubfcc\ub9ac\uac00 \uae4a\uc740 \ub098\ubb34\ub294\",\n  \"attributes\" : [\"posType\", \"leftPOS\", \"rightPOS\", \"morphemes\", \"reading\"],\n  \"explain\": true\n}\n'\n<\/code><\/pre>\n<p>\ub2e4\uc74c\uacfc \uac19\uc774 \ubd84\uc11d \uacb0\uacfc \ubc18\ud658\ud574\uc900\ub2e4.<\/p>\n<pre><code class=\"json\">{\n  \"detail\" : {\n    \"custom_analyzer\" : true,\n    \"charfilters\" : [ ],\n    \"tokenizer\" : {\n      \"name\" : \"nori_tokenizer\",\n      \"tokens\" : [\n        {\n          \"token\" : \"\ubfcc\ub9ac\",\n          \"start_offset\" : 0,\n          \"end_offset\" : 2,\n          \"type\" : \"word\",\n          \"position\" : 0,\n          \"leftPOS\" : \"NNG(General Noun)\",\n          \"morphemes\" : null,\n          \"posType\" : \"MORPHEME\",\n          \"reading\" : null,\n          \"rightPOS\" : \"NNG(General Noun)\"\n        },\n        {\n          \"token\" : \"\uac00\",\n          \"start_offset\" : 2,\n          \"end_offset\" : 3,\n          \"type\" : \"word\",\n          \"position\" : 1,\n          \"leftPOS\" : \"J(Ending Particle)\",\n          \"morphemes\" : null,\n          \"posType\" : \"MORPHEME\",\n          \"reading\" : null,\n          \"rightPOS\" : \"J(Ending Particle)\"\n        },\n        {\n          \"token\" : \"\uae4a\",\n          \"start_offset\" : 4,\n          \"end_offset\" : 5,\n          \"type\" : \"word\",\n          \"position\" : 2,\n          \"leftPOS\" : \"VA(Adjective)\",\n          \"morphemes\" : null,\n          \"posType\" : \"MORPHEME\",\n          \"reading\" : null,\n          \"rightPOS\" : \"VA(Adjective)\"\n        },\n        {\n          \"token\" : \"\uc740\",\n          \"start_offset\" : 5,\n          \"end_offset\" : 6,\n          \"type\" : \"word\",\n          \"position\" : 3,\n          \"leftPOS\" : \"E(Verbal endings)\",\n          \"morphemes\" : null,\n          \"posType\" : \"MORPHEME\",\n          \"reading\" : null,\n          \"rightPOS\" : \"E(Verbal endings)\"\n        },\n        {\n          \"token\" : \"\ub098\ubb34\",\n          \"start_offset\" : 7,\n          \"end_offset\" : 9,\n          \"type\" : \"word\",\n          \"position\" : 4,\n          \"leftPOS\" : \"NNG(General Noun)\",\n          \"morphemes\" : null,\n          \"posType\" : \"MORPHEME\",\n          \"reading\" : null,\n          \"rightPOS\" : \"NNG(General Noun)\"\n        },\n        {\n          \"token\" : \"\ub294\",\n          \"start_offset\" : 9,\n          \"end_offset\" : 10,\n          \"type\" : \"word\",\n          \"position\" : 5,\n          \"leftPOS\" : \"J(Ending Particle)\",\n          \"morphemes\" : null,\n          \"posType\" : \"MORPHEME\",\n          \"reading\" : null,\n          \"rightPOS\" : \"J(Ending Particle)\"\n        }\n      ]\n    },\n    \"tokenfilters\" : [ ]\n  }\n}\n<\/code><\/pre>\n<h2>Analyzer \uc801\uc6a9<\/h2>\n<h3>index \uc0dd\uc131<\/h3>\n<p>\uc544\ub798\uc640 \uac19\uc774 custom analyzer \ub97c \uc0dd\uc131\ud558\uace0, \ud574\ub2f9 analyzer \ub85c \ud615\ud0dc\uc18c \ubd84\uc11d\uc744 \uc801\uc6a9\ud558\uace0\uc790 \ud558\ub294 text \ud544\ub4dc\uc5d0 custom analyzer \ub97c \uc9c0\uc815\ud574\uc900\ub2e4.<\/p>\n<pre><code class=\"json\">curl -X PUT \"$HOSTNAME:9200\/_template\/template_ims?pretty\" -H 'Content-Type: application\/json' -d' \n{\n  \"template\" : \"nori-ims\",\n    \"settings\": {\n        \"index\": {\n            \"analysis\": {\n                \"analyzer\": {\n                    \"nori_korean\":{\n                    \"type\": \"custom\",\n                    \"tokenizer\": \"nori_tokenizer\"\n                    }\n                }\n            }\n        }\n    },\n  \"mappings\" : { \n    \"doc\": {\n      \"properties\": {\n        \"issue_title\": {\n        \"type\": \"text\",\n        \"analyzer\": \"nori_korean\"\n        },\n        \"issue_number\": {\n          \"type\": \"integer\"\n        },\n        .\n        .\n        .\n        \"Closed Date\": {\n          \"type\": \"date\",\n          \"format\": \"yyyy\/MM\/dd HH:mm:ss\"\n        },\n        \"issue_details\": {\n          \"type\": \"text\",\n          \"analyzer\": \"nori_korean\"\n        },\n        \"actions\": {\n          \"type\": \"text\",\n          \"analyzer\": \"nori_korean\"\n        }\n      }\n    }\n  }\n}\n'\n<\/code><\/pre>\n<p>\ud574\ub2f9 \uc778\ub371\uc2a4\uc5d0 \ub370\uc774\ud130\ub97c \uc778\ub371\uc2f1 \ud55c \ud6c4, \ud2b9\uc815 _id \uac12\uc758 \ud14d\uc2a4\ud2b8\uac00 \ubd84\uc11d \ub418\uc5c8\ub294\uc9c0 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"bash\">curl -X POST '$HOSTNAME:9200\/target_index_name\/target_doc_name\/doc_id\/_termvector?fields=field_name\\&amp;pretty=true'\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\ud55c\uae00 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30 nori \uc18c\uac1c \uc124\uce58 elasticsearch-plugin \uc774\uc6a9\ud574\uc11c \uc124\uce58. \ubaa8\ub4e0 \ub178\ub4dc\uc5d0 \uc124\uce58 \ub418\uc5b4\uc57c\ud558\uace0 \uc124\uce58 \uc774\ud6c4\uc5d0\ub294 \ub178\ub4dc \ubcc4 \uc7ac\uae30\ub3d9\uc774 \ud544\uc694. elasticsearch-plugin install analysis-nori Analysis \ud615\ud0dc\uc18c \ubd84\uc11d\uc744 \uc801\uc6a9\ud574\ubcf4\uace0 \uc2f6\uc740 text \uc5d0 \ub300\ud574 \ub2e4\uc74c\uacfc \uac19\uc774 analysis \ud14c\uc2a4\ud2b8\uac00 \uac00\ub2a5\ud558\ub2e4. curl -X GET &#8220;$HOSTNAME:9200\/_analyze?pretty&#8221; -H &#8216;Content-Type: application\/json&#8217; -d&#8217; { &#8220;tokenizer&#8221;: &#8220;nori_tokenizer&#8221;, &#8220;text&#8221;: &#8220;\ubfcc\ub9ac\uac00 \uae4a\uc740 \ub098\ubb34\ub294&#8221;, &#8220;attributes&#8221; : [&#8220;posType&#8221;, &#8220;leftPOS&#8221;, &#8220;rightPOS&#8221;, [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[12],"tags":[25,135,93],"class_list":["post-636","post","type-post","status-publish","format-standard","hentry","category-elasticsearch","tag-elasticsearch","tag-nlp","tag-93"],"_links":{"self":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/636","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/comments?post=636"}],"version-history":[{"count":3,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/636\/revisions"}],"predecessor-version":[{"id":1202,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/636\/revisions\/1202"}],"wp:attachment":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/media?parent=636"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/categories?post=636"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/tags?post=636"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}