{"id":848,"date":"2019-02-07T17:24:23","date_gmt":"2019-02-07T08:24:23","guid":{"rendered":"https:\/\/dong1lkim.oboki.net\/?p=848"},"modified":"2019-09-01T22:23:32","modified_gmt":"2019-09-01T13:23:32","slug":"pyspark-dataframe%ec%9d%84-python-%ec%9e%90%eb%a3%8c%ed%98%95dict%ec%9c%bc%eb%a1%9c-%eb%b3%80%ed%98%95%ed%95%98%ea%b8%b0","status":"publish","type":"post","link":"https:\/\/oboki.net\/workspace\/python\/pyspark-dataframe%ec%9d%84-python-%ec%9e%90%eb%a3%8c%ed%98%95dict%ec%9c%bc%eb%a1%9c-%eb%b3%80%ed%98%95%ed%95%98%ea%b8%b0\/","title":{"rendered":"[PySpark] dataframe\uc744 python \uc790\ub8cc\ud615(dict)\uc73c\ub85c \ubcc0\ud615\ud558\uae30"},"content":{"rendered":"<h1>[PySpark] dataframe\uc744 python \uc790\ub8cc\ud615(dict)\uc73c\ub85c \ubcc0\ud615\ud558\uae30<\/h1>\n<p>spark driver \uc5d0\uc11c \ub370\uc774\ud130\ub97c \ubc14\ub85c \uc0ac\uc6a9\ud558\ub294 \uacbd\uc6b0, dataframe\uc740 \ub2e4\ub8e8\uae30\uac00 \uc5b4\ub824\uc6b4 \uac83 \uac19\ub2e4. dataframe\uc758 <code>collect()<\/code>\uc640 <code>asDict()<\/code>\ub97c \uc774\uc6a9\ud558\uba74 Python \uc790\ub8cc\ud615\uc73c\ub85c \ubcc0\ud658\ud560 \uc218 \uc788\ub2e4.<\/p>\n<h2>dataframe \uc0dd\uc131<\/h2>\n<p><code>pyspark<\/code>\uc5d0\uc11c elasticsearch index\ub97c \uc870\ud68c\ud574 dataframe\uc744 \uc0dd\uc131\ud55c\ub2e4.<\/p>\n<pre><code class=\"py\">&gt;&gt;&gt; from pyspark.sql import SQLContext\n>&gt;&gt; sqlContext = SQLContext(sc)\n>&gt;&gt; df = sqlContext.read.format(\"org.elasticsearch.spark.sql\").option(\"es.nodes\",\"192.168.179.141:9200\").option(\"es.nodes.discovery\", \"true\").load(\"${INDEX}\/${TYPE}\")\n>&gt;&gt; df.registerTempTable(\"tab\")\n>&gt;&gt; df = sqlContext.sql(\"SELECT distinct request FROM tab\")\n>&gt;&gt; type(df)\n&lt;class 'pyspark.sql.dataframe.DataFrame'&gt;\n<\/code><\/pre>\n<p>type\uc774 \uc704\uc640 \uac19\uc774 <code>&lt;class 'pyspark.sql.dataframe.DataFrame'&gt;<\/code> \uc73c\ub85c \uc870\ud68c\ub418\uace0 spark driver\uc5d0\uc11c \ubcc0\ud615\ud558\uae30\uac00 \uc5b4\ub835\ub2e4.<\/p>\n<h2>collect()<\/h2>\n<p>\ub2e4\uc74c\uacfc \uac19\uc774 <code>collect()<\/code> \uba54\uc18c\ub4dc\ub97c \uc0ac\uc6a9\ud558\uba74, \uc6d0\uc18c\uac00 <code>&lt;class 'pyspark.sql.types.Row'&gt;<\/code>\uc778 <code>list<\/code> \uc790\ub8cc\ud615\uc73c\ub85c \ubc14\ub010\ub2e4.<\/p>\n<pre><code class=\"py\">&gt;&gt;&gt; collected = df.collect()\n>&gt;&gt; type(collected)                                                             \n&lt;class 'list'&gt;\n>&gt;&gt; type(collected[0])\n&lt;class 'pyspark.sql.types.Row'&gt;\n>&gt;&gt; type(collected[0])\n&lt;class 'pyspark.sql.types.Row'&gt;\n>&gt;&gt; collected\n[Row(request='\/issue\/findRelationIssues.do'), Row(request='\/product\/findProductPrefixList.do'), Row(request='\/tagfree\/xfreeEditor\/dialog\/splitcell.js'), Row(request='\/issue\/saveCoreSetComment.do'), Row(request='\/issue\/js\/common\/jquery-1.9.1.min.js'), Row(request='\/js\/floatMenu.js'), Row(request='\/main.do\/favicon.ico'), Row(request='\/tagfree\/xfreeEditor\/js\/xfe_mobile_info.js'), Row(request='\/tagfree\/xfreeEditor\/dialog\/insertlayout.js'), Row(request='\/dwr\/exec\/ProductDwr.findSearchSubVersions'), Row(request='\/js\/version.js'), Row(request='\/patch\/patchVeriForm.do'), Row(request='\/comment\/removeComment.do'), Row(request='\/knowledge\/knowledgeList.do'), Row(request='\/product\/productList.do'), Row(request='\/dwr\/interface\/IssueDwr.js'), Row(request='\/dwr\/exec\/UserProfileDwr.saveUserIssueColumns'), Row(request='\/patch\/patchSearchList.do'), Row(request='\/dwr\/engine.js'), Row(request='\/sso\/ssoLogout.jsp'), Row(request='\/checkIssuesAjax.do'), Row(request='\/manager\/managerList.do'), Row(request='\/sso\/js\/\/.js'), Row(request='\/tagfree\/xfreeEditor\/dialog\/shortcut.js'), Row(request='\/issue\/popupUrgent.do'), Row(request='\/innorix\/common\/upload.jsp'), Row(request='\/board\/readBoard.do'), Row(request='\/html\/eng_manual\/manualForm.jsp'), Row(request='\/tagfree\/xfreeEditor\/dialog\/medialink.js'), Row(request='\/dwr\/interface\/RequirementDwr.js'), Row(request='\/tagfree\/xfreeEditor\/js\/library\/rangy\/rangy-highlighter.js'), Row(request='\/js\/commentForm.js'), Row(request='\/tagfree\/xfreeEditor\/js\/xfe_env_contents.js'), Row(request='\/dwr\/exec\/UserDwr.findUsersByName'), Row(request='\/dwr\/interface\/UserDwr.js'), Row(request='\/tagfree\/xfreeEditor\/dialog\/paste.js'), Row(request='\/tagfree\/xfreeEditor\/dialog\/cellproperty.js'), Row(request='\/tagfree\/xfreeEditor\/js\/xfe_create_dialog.js'), Row(request='\/tagfree\/xfreeEditor\/js\/xfe_bottom_event.js'), Row(request='\/util\/saveFilteredIssueList.do'), Row(request='\/auth\/login.do'), Row(request='\/html\/OpenFrame\/Common\/html\/'), Row(request='\/sso\/ssologin.jsp'), Row(request='\/tagfree\/xfreeEditor\/dialog\/colorpicker.js'), Row(request='\/dwr\/exec\/UserDwr.findUsersByNameAll'), Row(request='\/board\/boardList.do'), Row(request='\/module\/moduleList.do'), Row(request='\/dwr\/exec\/ProductDwr.findSearchMainVersions'), Row(request='\/issue\/issueSearchList.do'), Row(request='\/tagfree\/xfreeEditor\/js\/xfe_range_handler.js'), Row(request='\/issue\/js\/common\/global.js')]\n<\/code><\/pre>\n<h2>asDict()<\/h2>\n<p><code>&lt;class 'pyspark.sql.types.Row'&gt;<\/code> \ud0c0\uc785\uc5d0 <code>asDict()<\/code> \uba54\uc18c\ub4dc\ub97c \uc0ac\uc6a9\ud558\uba74 dictionary \uc790\ub8cc\ud615\uc73c\ub85c \ubcc0\ud615\ud560 \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"py\">&gt;&gt;&gt; collected_first = collected[0].asDict()\n>&gt;&gt; type(collected_first)\n&lt;class 'dict'&gt;\n>&gt;&gt; collected_first\n{'request': '\/issue\/findRelationIssues.do'}\n>&gt;&gt; \n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>[PySpark] dataframe\uc744 python \uc790\ub8cc\ud615(dict)\uc73c\ub85c \ubcc0\ud615\ud558\uae30 spark driver \uc5d0\uc11c \ub370\uc774\ud130\ub97c \ubc14\ub85c \uc0ac\uc6a9\ud558\ub294 \uacbd\uc6b0, dataframe\uc740 \ub2e4\ub8e8\uae30\uac00 \uc5b4\ub824\uc6b4 \uac83 \uac19\ub2e4. dataframe\uc758 collect()\uc640 asDict()\ub97c \uc774\uc6a9\ud558\uba74 Python \uc790\ub8cc\ud615\uc73c\ub85c \ubcc0\ud658\ud560 \uc218 \uc788\ub2e4. dataframe \uc0dd\uc131 pyspark\uc5d0\uc11c elasticsearch index\ub97c \uc870\ud68c\ud574 dataframe\uc744 \uc0dd\uc131\ud55c\ub2e4. &gt;&gt;&gt; from pyspark.sql import SQLContext >&gt;&gt; sqlContext = SQLContext(sc) >&gt;&gt; df = sqlContext.read.format(&#8220;org.elasticsearch.spark.sql&#8221;).option(&#8220;es.nodes&#8221;,&#8221;192.168.179.141:9200&#8243;).option(&#8220;es.nodes.discovery&#8221;, &#8220;true&#8221;).load(&#8220;${INDEX}\/${TYPE}&#8221;) >&gt;&gt; df.registerTempTable(&#8220;tab&#8221;) >&gt;&gt; df = sqlContext.sql(&#8220;SELECT distinct [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[10,73],"tags":[103,34,102],"class_list":["post-848","post","type-post","status-publish","format-standard","hentry","category-python","category-spark","tag-dataframe","tag-python","tag-spark"],"_links":{"self":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/848","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/comments?post=848"}],"version-history":[{"count":4,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/848\/revisions"}],"predecessor-version":[{"id":1356,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/848\/revisions\/1356"}],"wp:attachment":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/media?parent=848"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/categories?post=848"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/tags?post=848"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}