{"id":1490,"date":"2020-02-07T23:55:39","date_gmt":"2020-02-07T14:55:39","guid":{"rendered":"https:\/\/oboki.net\/?p=1490"},"modified":"2020-02-11T00:06:47","modified_gmt":"2020-02-10T15:06:47","slug":"parquet_fallback_schema_resolution-%ec%84%a4%ec%a0%95%ea%b3%bc-parquet-%ed%85%8c%ec%9d%b4%eb%b8%94%ec%9d%98-schema-evolution","status":"publish","type":"post","link":"https:\/\/oboki.net\/workspace\/data-engineering\/database\/impala\/parquet_fallback_schema_resolution-%ec%84%a4%ec%a0%95%ea%b3%bc-parquet-%ed%85%8c%ec%9d%b4%eb%b8%94%ec%9d%98-schema-evolution\/","title":{"rendered":"[Impala] PARQUET_FALLBACK_SCHEMA_RESOLUTION \uc124\uc815\uacfc Parquet \ud14c\uc774\ube14\uc758 Schema Evolution"},"content":{"rendered":"<p>\ud558\ub098\uc758 Impala \ud14c\uc774\ube14\uc740 \uc5ec\ub7ec\uac1c\uc758 Parquet \ud30c\uc77c\ub85c \uad6c\uc131\ub420 \uc218 \uc788\ub294\ub370, \uc774\ub54c \uac01 \ud30c\ucf00\uc774 \ud30c\uc77c\ub4e4\uc758 \uceec\ub7fc \uc21c\uc11c\uac00 \ub2e4\ub97c \uc218\uac00 \uc788\ub2e4. \uc784\ud314\ub77c\ub9cc\uc744 \uc774\uc6a9\ud574\uc11c \ud30c\ucf00\uc774 \ud30c\uc77c\uc744 \uc0dd\uc131\ud588\ub2e4 \ud558\ub354\ub77c\ub3c4 \uae30\uc874\uc5d0 \uc5c6\ub358 \uceec\ub7fc\uc774 \uc774\ud6c4\uc5d0 rename \ub418\ub294 \uacbd\uc6b0 \ub610\ub294 \ub2e4\ub978 \ubc29\uc2dd\uc73c\ub85c \uc0dd\uc131\ub41c \ud30c\ucf00\uc774 \ud30c\uc77c\uc744 impala \uc5d0\uc11c \ucd94\uac00\ub85c \ub85c\ub4dc\ud558\ub294 \uacbd\uc6b0\uc5d0 \uc774\ub7f0 \uc0c1\ud669\uc774 \ubc1c\uc0dd\ud560 \uc218 \uc788\uc744 \uac83 \uac19\ub2e4.<\/p>\n<p>\uc5b4\uca0b\ub4e0 \ub2e4\uc74c\uacfc \uac19\uc740 \uc5d0\ub7ec\uac00 \ubc1c\uc0dd\ud558\ub294 \uacbd\uc6b0<\/p>\n<blockquote>\n<p>File &#8216;hdfs:\/\/quickstart.cloudera:8020\/user\/hive\/warehouse\/schema_evolution_test\/9246f35fb0c26132-8930434600000000_1994605024_data.0.parq&#8217; has an incompatible Parquet schema for column &#8216;default.schema_evolution_test.c1&#8217;. Column type: STRING, Parquet schema: optional int32 c2 [i:0 d:1 r:0]<\/p>\n<\/blockquote>\n<p>\uc544\ub798 \uc138\ud305\uc744 \ubcc0\uacbd\ud574\uc8fc\ub294 \uac83\uc73c\ub85c \ud574\uacb0\ud560 \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"language-sql\">set PARQUET_FALLBACK_SCHEMA_RESOLUTION=name;<\/code><\/pre>\n<p>\uc704 \uc124\uc815\uc740 \ub2e4\uc74c\uacfc \uac19\uc774 \ub450 \uac1c\uc758 \uac12\uc744 \uac00\uc9c8 \uc218 \uc788\ub294\ub370, \uae30\ubcf8\uac12\uc740 <code>position<\/code> \uc774\ub2e4.<\/p>\n<ul>\n<li>PARQUET_FALLBACK_SCHEMA_RESOLUTION\n<ul>\n<li>position<\/li>\n<li>name<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h2>CDH Demo Container \ud658\uacbd<\/h2>\n<p>\ud14c\uc2a4\ud2b8 \ud658\uacbd\uc740 \uc544\ub798 \ub9c1\ud06c\uc758 CDH 5.x \ub370\ubaa8 \ubc84\uc804 docker \uc774\ubbf8\uc9c0\ub97c \uc774\uc6a9\ud588\uc73c\uba70<\/p>\n<p><a href=\"https:\/\/downloads.cloudera.com\/demo_vm\/docker\/cloudera-quickstart-vm-5.13.0-0-beta-docker.tar.gz\">Download CDH-5.13.0 demo docker image<\/a><\/p>\n<p>\ucee8\ud14c\uc774\ub108 \uc0dd\uc131\uc740, docker import \uba85\ub839\uc744 \uc774\uc6a9\ud574\uc11c \uc774\ubbf8\uc9c0\ub97c \ubd88\ub7ec\uc628 \ub4a4<\/p>\n<pre><code class=\"language-bash\">curl -O https:\/\/downloads.cloudera.com\/demo_vm\/docker\/cloudera-quickstart-vm-5.13.0-0-beta-docker.tar.gz\ntar -xvzf cloudera-quickstart-vm-*-docker.tar.gz\ndocker import - cloudera\/quickstart:5.13 &lt; cloudera-quickstart-vm-*-docker\/*.tar<\/code><\/pre>\n<p>\ub2e4\uc74c\uacfc \uac19\uc740 \uba85\ub839\uc73c\ub85c \uc0dd\uc131\ud588\ub2e4. Impala \ud14c\uc2a4\ud2b8\ub97c \uc704\ud574\uc11c\ub294 <code>Hue<\/code> \ud658\uacbd\ub9cc \uc788\uc73c\uba74 \ucda9\ubd84\ud558\uae30 \ub54c\ubb38\uc5d0 8888 \ud3ec\ud2b8\ub9cc \ub9e4\ud551\ud55c\ub2e4.<\/p>\n<pre><code class=\"language-bash\">docker run \\\n--name cloudera.cluster.01 \\\n--hostname=quickstart.cloudera \\\n--privileged=true -t -i -d \\\n-p 8888:8888 \\\ncloudera\/quickstart:5.13 \/usr\/bin\/docker-quickstart<\/code><\/pre>\n<h3>Windows \ud658\uacbd\uc778 \uacbd\uc6b0<\/h3>\n<p>Mac\/Linux \uac00 \uc544\ub2cc Windows \ud658\uacbd\uc758 \uacbd\uc6b0 \uc774\ubbf8\uc9c0\ub97c \ubd88\ub7ec\uc62c \ub54c <code>type<\/code>\uc744 \uc774\uc6a9\ud574\uc11c \ud45c\uc900 \uc785\ub825\uc73c\ub85c \ub123\uc5b4\uc8fc\uba74 \ub41c\ub2e4.<\/p>\n<pre><code class=\"language-cmd\">type cloudera-quickstart-vm-5.13.0-0-beta-docker.tar | docker import - cloudera\/quickstart:5.13<\/code><\/pre>\n<h2>PARQUET_FALLBACK_SCHEMA_RESOLUTION \uad00\ub828 \ud14c\uc2a4\ud2b8<\/h2>\n<blockquote>\n<p>incompatible parquet schema \uc5d0\ub7ec \uc7ac\ud604 \ubc0f PARQUET_FALLBACK_SCHEMA_RESOLUTION \ubcc0\uacbd \ud14c\uc2a4\ud2b8 \uc2dc\ub098\ub9ac\uc624<\/p>\n<\/blockquote>\n<h3>\ud14c\uc2a4\ud2b8 \ud658\uacbd<\/h3>\n<p><code>Hue<\/code>\ub294 \uc6f9\ube0c\ub77c\uc6b0\uc800\uc5d0\uc11c <code>http:\/\/localhost:8888<\/code> \uacbd\ub85c\uc5d0 \uc811\uadfc\ud55c \ub4a4, <code>id\/pass<\/code>: <code>cloudera\/cloudera<\/code> \ub85c \ub85c\uadf8\uc778\ud558\uba74 \ub41c\ub2e4.<\/p>\n<h3>Impala \uc7ac\ud604 \uc2a4\ud06c\ub9bd\ud2b8<\/h3>\n<p><code>Hue<\/code> \uc5d0\ub514\ud130\uac00 \uc815\uc0c1\uc801\uc73c\ub85c \ub5b4\uc73c\uba74 \uc544\ub798 \uc2a4\ud06c\ub9bd\ud2b8 \uc21c\uc11c\ub300\ub85c \uc218\ud589<\/p>\n<pre><code class=\"language-sql\">-- \ud14c\uc2a4\ud2b8\ub97c \uc704\ud55c \ud14c\uc774\ube14 \uc0dd\uc131\ncreate table default.schema_evolution_test (\n    c1 string\n  , c2 int\n) stored as parquet;\n\n-- \uceec\ub7fc \uc21c\uc11c\uac00 \ud14c\uc2a4\ud2b8 \ud14c\uc774\ube14\uacfc \ubc18\ub300\uc778 Parquet \ud30c\uc77c \uc0dd\uc131\uc744 \uc704\ud55c \uc784\uc2dc \ud14c\uc774\ube14 \uc0dd\uc131\ncreate table default.schema_evolution_test_tmp (\n    c2 int\n  , c1 string\n) stored as parquet;\n\n-- \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130 \uc0dd\uc131\ninsert into default.schema_evolution_test (\n    c1, c2\n) values (&#039;1&#039;,1);\ninsert into default.schema_evolution_test_tmp (\n    c1, c2\n) values (&#039;2&#039;,2);\n\n-- \uc784\uc2dc \ud14c\uc774\ube14\uc758 Parquet \ud30c\uc77c \uacbd\ub85c \uc870\ud68c\ndescribe formatted default.schema_evolution_test_tmp;\n--Location: hdfs:\/\/quickstart.cloudera:8020\/user\/hive\/warehouse\/schema_evolution_test_tmp\n\n-- \uceec\ub7fc \uc21c\uc11c\uac00 \ub2e4\ub978 \ub450 Parquet \ud30c\uc77c\uc744 \ud558\ub098\uc758 \ud14c\uc774\ube14\uc5d0 \ub85c\ub4dc\nload data\n     inpath &#039;hdfs:\/\/quickstart.cloudera:8020\/user\/hive\/warehouse\/schema_evolution_test_tmp&#039;\ninto table\n     default.schema_evolution_test;\n\n-- PARQUET_FALLBACK_SCHEMA_RESOLUTION=position \uc778 \uc0c1\ud0dc\uc5d0\uc11c \uc870\ud68c\uacb0\uacfc \uc5d0\ub7ec \ubc1c\uc0dd\nselect * from default.schema_evolution_test;\n--File &#039;hdfs:\/\/quickstart.cloudera:8020\/user\/hive\/warehouse\/schema_evolution_test\/9246f35fb0c26132-8930434600000000_1994605024_data.0.parq&#039; has an incompatible Parquet schema for column &#039;default.schema_evolution_test.c1&#039;. Column type: STRING, Parquet schema: optional int32 c2 [i:0 d:1 r:0]\n\n-- \ud14c\uc2a4\ud2b8 \uc138\uc158\uc5d0 \ub300\ud574 PARQUET_FALLBACK_SCHEMA_RESOLUTION=name \ubcc0\uacbd\nset PARQUET_FALLBACK_SCHEMA_RESOLUTION=name;\n\n-- \uc815\uc0c1 \uc870\ud68c\nselect * from default.schema_evolution_test;<\/code><\/pre>\n<h3>Spark, read parquet \ud14c\uc2a4\ud2b8<\/h3>\n<p>\uc704 \ud14c\uc2a4\ud2b8\uc5d0\uc11c \uc0dd\uc131\ub41c \ub450 Parquet \ud30c\uc77c\uc744 \ub85c\uceec\uc5d0 \ub0b4\ub824\ubc1b\uc544, \ub85c\uceec <code>spark-shell<\/code> \uc5d0\uc11c \ud14c\uc2a4\ud2b8\ud55c \uacb0\uacfc, Spark \uc5d0\uc11c\ub294 \uceec\ub7fc \uc624\ub354\uac00 \ub2e4\ub978 \ud30c\uc77c\ub4e4\uc744 \uc77d\ub354\ub77c\ub3c4 key \uac12\uc5d0 \ub9de\ucd94\uc5b4 \uc2a4\ud0a4\ub9c8 \ub9e4\ud551\uc744 \ud558\ub294 \uac83\uc744 \ud655\uc778<\/p>\n<pre><code class=\"language-scala\">scala&gt; val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n\nscala&gt; val myParq = sqlContext.read.parquet(&quot;*.parq&quot;)\n\nscala&gt; myParq.show()\n+----+---+\n|  c1| c2|\n+----+---+\n|[31]|  1|\n..\n.\n\nscala&gt; myParq.printSchema()\nroot\n |-- c1: binary (nullable = true)\n |-- c2: integer (nullable = true)\n\nscala&gt; myParq.registerTempTable(&quot;tab&quot;)\nscala&gt; result.show()\n+----+---+\n|  c1| c2|\n+----+---+\n|[31]|  1|\n..\n.<\/code><\/pre>\n<h2>\ucc38\uace0 \ub9c1\ud06c<\/h2>\n<h3>Parquet Schema Evolution<\/h3>\n<ul>\n<li><a href=\"https:\/\/docs.cloudera.com\/documentation\/enterprise\/5-9-x\/topics\/impala_parquet.html#parquet_schema_evolution\">https:\/\/docs.cloudera.com\/documentation\/enterprise\/5-9-x\/topics\/impala_parquet.html#parquet_schema_evolution<\/a><\/li>\n<\/ul>\n<h3>Cloudera \ub9c8\uc2a4\ud130\uc758 \ucf54\uba58\ud2b8<\/h3>\n<ul>\n<li>\n<p><a href=\"https:\/\/community.cloudera.com\/t5\/Support-Questions\/PARQUET-FALLBACK-SCHEMA-RESOLUTION\/m-p\/62321\/highlight\/true#M13946\">https:\/\/community.cloudera.com\/t5\/Support-Questions\/PARQUET-FALLBACK-SCHEMA-RESOLUTION\/m-p\/62321\/highlight\/true#M13946<\/a><\/p>\n<\/li>\n<li>\n<p><code>PARQUET_FALLBACK_SCHEMA_RESOLUTION<\/code> \uac12\uc740 position \ubcf4\ub2e4\ub294 name \uc744 \uae30\ubcf8\uc73c\ub85c \uc124\uc815\ud558\ub294 \uac83\uc774 \ubcf4\ub2e4 \uc9c1\uad00\uc801\uc774\uba70, \uc131\ub2a5 \ucc28\uc774\ub294 \ud06c\uac8c \uc5c6\uc74c.<\/p>\n<\/li>\n<li>\n<p>position, name \uc911 \uc5b4\ub290 \uac83\uc744 \uace0\ub974\ub294\uc9c0\uc5d0 \ub530\ub77c tradeoff \uac00 \uc874\uc7ac<\/p>\n<ul>\n<li><code>position<\/code> \uc73c\ub85c \ud558\ub294 \uacbd\uc6b0, \uceec\ub7fc \ucd94\uac00\ub97c \ub9e8 \ub4a4\uc5d0 \ud558\ub294 \uac83\ub9cc \uac00\ub2a5\ud558\uc9c0\ub9cc \uceec\ub7fc\uba85 renaming \uc5d0 \uc790\uc720\ub85c\uc6c0<\/li>\n<li><code>name<\/code> \uc73c\ub85c \ud558\ub294 \uacbd\uc6b0 \uceec\ub7fc \uc704\uce58\uc5d0 \uc0c1\uad00\uc5c6\uc774 add\/drop \uc774 \uac00\ub2a5<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>\ud558\ub098\uc758 Impala \ud14c\uc774\ube14\uc740 \uc5ec\ub7ec\uac1c\uc758 Parquet \ud30c\uc77c\ub85c \uad6c\uc131\ub420 \uc218 \uc788\ub294\ub370, \uc774\ub54c \uac01 \ud30c\ucf00\uc774 \ud30c\uc77c\ub4e4\uc758 \uceec\ub7fc \uc21c\uc11c\uac00 \ub2e4\ub97c \uc218\uac00 \uc788\ub2e4. \uc784\ud314\ub77c\ub9cc\uc744 \uc774\uc6a9\ud574\uc11c \ud30c\ucf00\uc774 \ud30c\uc77c\uc744 \uc0dd\uc131\ud588\ub2e4 \ud558\ub354\ub77c\ub3c4 \uae30\uc874\uc5d0 \uc5c6\ub358 \uceec\ub7fc\uc774 \uc774\ud6c4\uc5d0 rename \ub418\ub294 \uacbd\uc6b0 \ub610\ub294 \ub2e4\ub978 \ubc29\uc2dd\uc73c\ub85c \uc0dd\uc131\ub41c \ud30c\ucf00\uc774 \ud30c\uc77c\uc744 impala \uc5d0\uc11c \ucd94\uac00\ub85c \ub85c\ub4dc\ud558\ub294 \uacbd\uc6b0\uc5d0 \uc774\ub7f0 \uc0c1\ud669\uc774 \ubc1c\uc0dd\ud560 \uc218 \uc788\uc744 \uac83 \uac19\ub2e4. \uc5b4\uca0b\ub4e0 \ub2e4\uc74c\uacfc \uac19\uc740 \uc5d0\ub7ec\uac00 \ubc1c\uc0dd\ud558\ub294 \uacbd\uc6b0 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[161],"tags":[],"class_list":["post-1490","post","type-post","status-publish","format-standard","hentry","category-impala"],"_links":{"self":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/1490","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/comments?post=1490"}],"version-history":[{"count":0,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/1490\/revisions"}],"wp:attachment":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/media?parent=1490"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/categories?post=1490"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/tags?post=1490"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}