{"id":791,"date":"2018-12-15T11:11:20","date_gmt":"2018-12-15T02:11:20","guid":{"rendered":"https:\/\/dong1lkim.oboki.net\/?p=791"},"modified":"2019-09-01T22:20:24","modified_gmt":"2019-09-01T13:20:24","slug":"python-selenium%ec%9d%84-%ec%9d%b4%ec%9a%a9%ed%95%9c-crawler-%eb%a1%9c%eb%98%90-%eb%8b%b9%ec%b2%a8-%ed%8c%90%eb%a7%a4%ec%a0%90-%ec%a0%95%eb%b3%b4","status":"publish","type":"post","link":"https:\/\/oboki.net\/workspace\/python\/python-selenium%ec%9d%84-%ec%9d%b4%ec%9a%a9%ed%95%9c-crawler-%eb%a1%9c%eb%98%90-%eb%8b%b9%ec%b2%a8-%ed%8c%90%eb%a7%a4%ec%a0%90-%ec%a0%95%eb%b3%b4\/","title":{"rendered":"[Python] selenium\uc744 \uc774\uc6a9\ud55c crawler &#8211; \ub85c\ub610 \ub2f9\ucca8 \ud310\ub9e4\uc810 \uc815\ubcf4"},"content":{"rendered":"<h1>selenium\uc744 \uc774\uc6a9\ud55c crawler &#8211; \ub85c\ub610 \ub2f9\ucca8 \ud310\ub9e4\uc810 \uc815\ubcf4<\/h1>\n<blockquote><p>\n  Selenium Web Driver\ub97c \uc774\uc6a9\ud574\uc11c, Session\uc744 \uc720\uc9c0\ud55c \uc0c1\ud0dc\uc5d0\uc11c \uc815\ubcf4\ub97c \uc218\uc9d1\ud558\ub294 Crawler \ub97c \ub9cc\ub4e0\ub2e4.\n<\/p><\/blockquote>\n<p><a href=\"https:\/\/www.dhlottery.co.kr\/store.do?method=topStore&amp;pageGubun=L645]\"><a href=\"https:\/\/www.dhlottery.co.kr\/store.do?method=topStore&amp;amp;pageGubun=L645\">https:\/\/www.dhlottery.co.kr\/store.do?method=topStore&amp;pageGubun=L645<\/a><\/a> \ud398\uc774\uc9c0\uc5d0 \ub4e4\uc5b4\uac00\uba74 \ud68c\ucc28\ubcc4 1\ub4f1 \ubc30\ucd9c\uc810\uc744 \uc870\ud68c\ud560 \uc218 \uc788\ub2e4. \ud68c\ucc28\ub97c \uc120\ud0dd\ud558\uae30 \uc704\ud574\uc11c\ub294 Select Box\uc5d0\uc11c \ud655\uc778\ud558\uace0\uc790 \ud558\ub294 \ud68c\ucc28\ub97c \uc120\ud0dd\ud558\uba74 \ub418\ub294\ub370, url\uc744 \ud1b5\ud574\uc11c \uc811\uadfc\ud560 \uc218\ub294 \uc5c6\ub2e4.<br \/>\n\uc774 \ub54c\ubb38\uc5d0 \ud2b9\uc815 \ud68c\ucc28\uc758 \ub2f9\ucca8 \ud310\ub9e4\uc810\uc744 \uc870\ud68c\ud558\uae30 \uc704\ud574\uc11c\ub294 \ucd5c\ucd08 \ud398\uc774\uc9c0\uc5d0 \uc811\uc18d\ud55c \ub4a4 \ud574\ub2f9 Session\uc744 \uc720\uc9c0\ud55c \ucc44\ub85c \ud68c\ucc28\ub97c \uc120\ud0dd\ud574\ub098\uac00\ub294 \uc218\ubc16\uc5d0 \uc5c6\uace0 Selenium\uc758 Web Driver\ub97c \uc774\uc6a9\ud558\uba74 \ub41c\ub2e4.<\/p>\n<h2>\uc124\uce58<\/h2>\n<h3>Selenium<\/h3>\n<p><code>pip install selenium<\/code> \uba85\ub839\uc73c\ub85c selenium \ud328\ud0a4\uc9c0\ub97c \uc124\uce58\ud55c\ub2e4.<\/p>\n<h3>Web Driver<\/h3>\n<p>WebDriver\ub97c \uc81c\uacf5\ud558\ub294 \ube0c\ub77c\uc6b0\uc800 \uc0ac\uc774\ud2b8\uc5d0\uc11c \uc0ac\uc6a9\ud558\uace0\uc790 \ud558\ub294 Web Driver\ub97c \ub2e4\uc6b4\ub85c\ub4dc \ubc1b\ub294\ub2e4.<\/p>\n<ul>\n<li>Chrome\n<ul>\n<li>\ud06c\ub864\ub9c1\ud558\ub294 \uacfc\uc815\uc744 Chrome \ud654\uba74\uc5d0\uc11c \ubcfc \uc218 \uc788\ub2e4. \ub514\ubc84\uae45 \ud558\uae30\uc5d0 \uc88b\uc740\ub370 GUI \ud658\uacbd\uc774 \ud544\uc694\ud558\ub2e4.<\/li>\n<li><a href=\"http:\/\/chromedriver.chromium.org\/downloads\">http:\/\/chromedriver.chromium.org\/downloads<\/a><\/li>\n<\/ul>\n<\/li>\n<li>Phantomjs\n<ul>\n<li>GUI \ub97c \uc0ac\uc6a9\ud558\uc9c0 \uc54a\uace0 \ubc31\uadf8\ub77c\uc6b4\ub4dc\uc5d0\uc11c \uc138\uc158\uc744 \uc0dd\uc131\ud558\uace0 html\uc744 \ubc1b\uc544\uc624\ub294 \ub9cc\ud07c \uac00\ubccd\ub2e4.<\/li>\n<li>Selenium \uc5d0\uc11c\ub294 \ub354\uc774\uc0c1 \uc9c0\uc6d0\ud558\uc9c0 \uc54a\ub294 \uac83 \uac19\ub2e4. <strong>Deprecated<\/strong><\/li>\n<li><a href=\"http:\/\/phantomjs.org\/download.html\">http:\/\/phantomjs.org\/download.html<\/a><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h2>Sample<\/h2>\n<p>\uc544\ub798\uc758 \ud504\ub85c\uadf8\ub7a8\uc740 \ub3d9\ud589\ubcf5\uad8c \ub2f9\ucca8 \ud310\ub9e4\uc810 \ud398\uc774\uc9c0\uc5d0 \uc811\uc18d\ud55c \ub4a4, 262~839 \ud68c\ucc28\ub97c \uc21c\uc11c\ub300\ub85c \uc120\ud0dd\ud558\uc5ec 1\ub4f1 \ubc30\ucd9c\uc810 \uc815\ubcf4\ub97c \uc800\uc7a5\ud55c\ub2e4.<\/p>\n<pre><code class=\"py\">from selenium import webdriver\nfrom bs4 import BeautifulSoup\n\ndriver = webdriver.Chrome('.\/chromedriver.exe')\n#driver = webdriver.PhantomJS('.\/phantomjs-2.1.1-windows\/bin\/phantomjs.exe')\ndriver.implicitly_wait(3)\n\ndriver.get('https:\/\/www.dhlottery.co.kr\/store.do?method=topStore&amp;pageGubun=L645')\nresult = []\n\nfor i in range(262,840):\n    path = '\/\/*[@id=\"drwNo\"]\/option[text()=\"' + str(i) + '\"]'\n    driver.find_element_by_xpath(path).click()\n    driver.find_element_by_xpath('\/\/*[@id=\"searchBtn\"]').click()\n\n    html = driver.page_source\n    soup = BeautifulSoup(html,'html.parser')\n    locs = soup.find('table',{'class':'tbl_data tbl_data_col'}).findAll('tr')\n    temp = []\n    if len(locs) &gt; 2 :\n        for j in range(1,len(locs)):\n            loc = locs[j].findAll('td')\n            shop = loc[1].text.strip()\n            mode = loc[2].text.strip()\n            addr = loc[3].text.strip()\n            temp.append({'shop name':shop,'mode':mode,'location':addr})\n    #print(temp)\n    result.append({'round':i,'shops':temp})\n\nimport json\nprint(json.dumps(result,indent=2))\n\nimport pickle\nwith open('lotto_loc.bin','wb') as f:\n    pickle.dump(result,f)\n    f.close()\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>selenium\uc744 \uc774\uc6a9\ud55c crawler &#8211; \ub85c\ub610 \ub2f9\ucca8 \ud310\ub9e4\uc810 \uc815\ubcf4 Selenium Web Driver\ub97c \uc774\uc6a9\ud574\uc11c, Session\uc744 \uc720\uc9c0\ud55c \uc0c1\ud0dc\uc5d0\uc11c \uc815\ubcf4\ub97c \uc218\uc9d1\ud558\ub294 Crawler \ub97c \ub9cc\ub4e0\ub2e4. https:\/\/www.dhlottery.co.kr\/store.do?method=topStore&amp;pageGubun=L645 \ud398\uc774\uc9c0\uc5d0 \ub4e4\uc5b4\uac00\uba74 \ud68c\ucc28\ubcc4 1\ub4f1 \ubc30\ucd9c\uc810\uc744 \uc870\ud68c\ud560 \uc218 \uc788\ub2e4. \ud68c\ucc28\ub97c \uc120\ud0dd\ud558\uae30 \uc704\ud574\uc11c\ub294 Select Box\uc5d0\uc11c \ud655\uc778\ud558\uace0\uc790 \ud558\ub294 \ud68c\ucc28\ub97c \uc120\ud0dd\ud558\uba74 \ub418\ub294\ub370, url\uc744 \ud1b5\ud574\uc11c \uc811\uadfc\ud560 \uc218\ub294 \uc5c6\ub2e4. \uc774 \ub54c\ubb38\uc5d0 \ud2b9\uc815 \ud68c\ucc28\uc758 \ub2f9\ucca8 \ud310\ub9e4\uc810\uc744 \uc870\ud68c\ud558\uae30 \uc704\ud574\uc11c\ub294 \ucd5c\ucd08 \ud398\uc774\uc9c0\uc5d0 \uc811\uc18d\ud55c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[10],"tags":[126,34,125],"class_list":["post-791","post","type-post","status-publish","format-standard","hentry","category-python","tag-crawl","tag-python","tag-selenium"],"_links":{"self":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/791","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/comments?post=791"}],"version-history":[{"count":4,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/791\/revisions"}],"predecessor-version":[{"id":1192,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/791\/revisions\/1192"}],"wp:attachment":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/media?parent=791"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/categories?post=791"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/tags?post=791"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}