{"id":602,"date":"2018-12-14T13:20:09","date_gmt":"2018-12-14T04:20:09","guid":{"rendered":"https:\/\/dong1lkim.oboki.net\/?p=602"},"modified":"2019-09-01T22:20:24","modified_gmt":"2019-09-01T13:20:24","slug":"python-requests-%eb%a5%bc-%ec%9d%b4%ec%9a%a9%ed%95%9c-crawler-%eb%a1%9c%eb%98%90-%ed%9a%8c%ec%b0%a8%eb%b3%84-%eb%8b%b9%ec%b2%a8-%ea%b2%b0%ea%b3%bc","status":"publish","type":"post","link":"https:\/\/oboki.net\/workspace\/python\/python-requests-%eb%a5%bc-%ec%9d%b4%ec%9a%a9%ed%95%9c-crawler-%eb%a1%9c%eb%98%90-%ed%9a%8c%ec%b0%a8%eb%b3%84-%eb%8b%b9%ec%b2%a8-%ea%b2%b0%ea%b3%bc\/","title":{"rendered":"[Python] requests\ub97c \uc774\uc6a9\ud55c crawler &#8211; \ub85c\ub610 \ud68c\ucc28\ubcc4 \ub2f9\ucca8 \uacb0\uacfc"},"content":{"rendered":"<h1>requests\ub97c \uc774\uc6a9\ud55c crawler &#8211; \ub85c\ub610 \ud68c\ucc28\ubcc4 \ub2f9\ucca8 \uacb0\uacfc<\/h1>\n<blockquote><p>\n  Python \uc744 \uc774\uc6a9\ud574\uc11c \ub85c\ub610 \ub2f9\ucca8 \uacb0\uacfc\ub97c \uc218\uc9d1\ud558\ub294 Crawler\ub97c \ub9cc\ub4e0\ub2e4.\n<\/p><\/blockquote>\n<h2>\ubaa9\ud45c<\/h2>\n<p><a href=\"https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;drwNo=1\"><a href=\"https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;amp;drwNo=1\">https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;drwNo=1<\/a><\/a> \ud398\uc774\uc9c0\uc5d0 \ub4e4\uc5b4\uac00\uba74 \uc544\ub798\uc640 \uac19\uc774 \ud68c\ucc28\ubcc4 \ub2f9\ucca8 \uc815\ubcf4\ub97c \uc870\ud68c\ud560 \uc218 \uc788\ub294\ub370, \uac01 \ud68c\ucc28\ubcc4 \ud398\uc774\uc9c0\uc5d0\uc11c \uc544\ub798 \ud45c\uc2dc\ub41c \ub2f9\ucca8 \ubc88\ud638, \ubcf4\ub108\uc2a4 \ubc88\ud638, \ub4f1\uc704\ubcc4 \ub2f9\ucca8\uae08\uc561, \ub4f1\uc704\ubcc4 \ub2f9\ucca8 \uac8c\uc784 \uc218 \ub370\uc774\ud130\ub97c \uc218\uc9d1\ud55c\ub2e4.<\/p>\n<p><img decoding=\"async\" src=\"\/workspace\/wp-content\/uploads\/2018\/12\/lotto_result_page.png\" alt=\"\" \/><\/p>\n<h2>\ub9cc\ub4e4\uae30<\/h2>\n<h3>requests<\/h3>\n<p><code>pip install requests<\/code> \uba85\ub839\uc73c\ub85c <code>requests<\/code> \ud328\ud0a4\uc9c0\ub97c \uc124\uce58\ud55c\ub2e4.<\/p>\n<p>\uc544\ub798\uc640 \uac19\uc774 \ud2b9\uc815 url\uc5d0 get \uc694\uccad\uc744 \ubcf4\ub0b4\uace0 text \uc18d\uc131\uc744 \uc870\ud68c\ud558\uba74<\/p>\n<pre><code class=\"py\">import requests\n\nurl = 'https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;drwNo=1'\nhtml = requests.get(url).text\nprint(html)\n<\/code><\/pre>\n<p>\ub2e4\uc74c\uacfc \uac19\uc774 html \ubb38\uc11c\uac00 \uc804\ubd80 \ubc18\ud658\ub418\ub294 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"txt\">[python@node2 crawl]$ python test.py  | more\n&lt;!DOCTYPE html&gt;\n&lt;html lang=\"ko\"&gt;\n&lt;head&gt;\n&lt;meta charset=\"EUC-KR\"&gt;\n&lt;meta id=\"utitle\" name=\"title\" content=\"\ub3d9\ud589\ubcf5\uad8c\"&gt;\n&lt;meta id=\"desc\" name=\"description\" content=\"\ub3d9\ud589\ubcf5\uad8c 1\ud68c \ub2f9\ucca8\ubc88\ud638 10,23,29,33,37,40+16. 1\ub4f1 \ucd1d 0\uba85, 1\uc778\ub2f9 \ub2f9\ucca8\uae08\uc561 0\uc6d0.\"&gt;\n&lt;title&gt;\ub85c\ub6106\/45 - \ud68c\ucc28\ubcc4 \ub2f9\ucca8\ubc88\ud638&lt;\/title&gt;\n&lt;title&gt;\ub3d9\ud589\ubcf5\uad8c&lt;\/title&gt;\n\n&lt;meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\"&gt;\n&lt;link rel=\"shortcut icon\" href=\"\/images\/common\/favicon.ico\" type=\"image\/x-icon\"&gt;\n&lt;link rel=\"icon\" href=\"\/images\/common\/favicon.ico\" type=\"image\/x-icon\"&gt;\n&lt;script type=\"text\/javascript\" src=\"\/js\/jquery-1.9.1.min.js\"&gt;&lt;\/script&gt;\n&lt;script type=\"text\/javascript\" src=\"\/js\/common.js\" charset=\"utf-8\"&gt;&lt;\/script&gt;\n&lt;script type=\"text\/javascript\"&gt;\n\nfn_g_init_message(\"\");\n\nvar gameUserId = \"\";\n\nfunction goGame(){\n    var userId = \"\";\n\n    if(userId == '' || userId == null){\n        alert(\"\ub85c\uadf8\uc778 \ud6c4 \uc0ac\uc6a9 \ud574\uc8fc\uc2dc\uae30 \ubc14\ub78d\ub2c8\ub2e4.\");\n        location.href = \"\/user.do?method=login\";\n        return;\n    }\n\n    $.ajax({\n        type:\"get\",                             \/\/ \uba54\uc18c\ub4dc\ud0c0\uc785\n        url:\"https:\/\/el.dhlottery.co.kr\/portal_login.jsp\",  \/\/  url\n        dataType:\"jsonp\",                       \/\/\uc774\ubd80\ubd84\uc774 \uc911\uc694 \ub370\uc774\ud0c0\ud0c0\uc785\uc744 jsonP\ub85c \ud574\uc918\uc57c \ud06c\ub85c\uc2a4\ub3c4\uba54\uc778\uc744 \uc774\uc6a9\ud560\uc218 \uc788\ub2e4.\n        jsonp : 'callback',                     \/\/ \ucf5c\ubc31\ud568\uc218 \uc774\ub984 \uba85\uc774\ub2e4. \n        timeout:3000,\n        error: function() {                     \/\/ \uc5d0\ub7ec\ub0a0\uacbd\uc6b0 \ucf5c\ubc31\ud568\uc218\n            alert('\uc811\uc18d\uc774 \uc6d0\ud560\ud558\uc9c0 \uc54a\uc2b5\ub2c8\ub2e4.'); \n        }, \n        success: function(data){                \/\/ \uc131\uacf5\ud588\uc744\ub54c \ucf5c\ubc31\ud568\uc218\n\n            if(userId == data.userId &amp;&amp; data.userId != \"\"){\n                doGamePopUp(\"\");\n            }else{\n                alert(\"\ub85c\uadf8\uc778 \uc138\uc158\uc774 \ud574\uc81c \ub418\uc5c8\uc2b5\ub2c8\ub2e4.\\n\ub2e4\uc2dc \ud55c\ubc88 \ub85c\uadf8\uc778\ud574 \uc8fc\uc2dc\uae30 \ubc14\ub78d\ub2c8\ub2e4.\");\n...\n..\n.\n<\/code><\/pre>\n<h3>bs4<\/h3>\n<p><code>pip install bs4<\/code> \uba85\ub839\uc73c\ub85c <code>BeautifulSoup<\/code> \ud328\ud0a4\uc9c0\ub97c \uc124\uce58\ud55c\ub2e4. <code>BeautifulSoup<\/code>\uc758 <code>html.parser<\/code>\ub97c \uc774\uc6a9\ud558\uba74 html string source\ub97c \ud30c\uc2f1\ud558\uace0 \ud544\uc694\ud55c \uc694\uc18c\ub9cc \uc120\ud0dd\ud558\uc5ec \ud65c\uc6a9\ud560 \uc218 \uc788\ub2e4.<\/p>\n<p>\ud68c\ucc28\ubcc4 \ub2f9\ucca8\ubc88\ud638 \ud398\uc774\uc9c0\uc5d0\uc11c \ubcf4\ub108\uc2a4 \ubc88\ud638\uc758 \uac12\uc744 \ucd94\ucd9c\ud558\uace0\uc790 \ud558\ub294 \uacbd\uc6b0 \ub2e4\uc74c\uacfc \uac19\uc740 \uc808\ucc28\ub97c \uc218\ud589\ud55c\ub2e4.<\/p>\n<h4>1. \ud398\uc774\uc9c0 \uc694\uc18c \uac80\uc0ac<\/h4>\n<p>\uc6d0\ud558\ub294 \uc624\ube0c\uc81d\ud2b8\uc5d0 \ub9c8\uc6b0\uc2a4\ub97c \uc704\uce58\uc2dc\ud0a4\uace0 <code>\uac80\uc0ac<\/code>\ub97c \uc218\ud589\ud55c\ub2e4.<\/p>\n<p><img decoding=\"async\" src=\"\/workspace\/wp-content\/uploads\/2018\/12\/page_inspection-e1545241377770.png\" alt=\"\" \/><\/p>\n<p>\uc704\uc640 \uac19\uc774 \ud06c\ub86c \uac1c\ubc1c\uc790 \ub3c4\uad6c\uc640 \ud568\uaed8 \ud574\ub2f9 \uc624\ube0c\uc81d\ud2b8\uc758 \uc18c\uc2a4 \uc704\uce58\uac00 \ud558\uc774\ub77c\uc774\ud2b8 \ub41c\ub2e4.<\/p>\n<h4>2. selector \ubcf5\uc0ac<\/h4>\n<p>\ud558\uc774\ub77c\uc774\ud2b8 \ub41c \uc624\ube0c\uc81d\ud2b8 \uc18c\uc2a4\ub97c \ub2e4\uc2dc \uc6b0\ud074\ub9ad\ud574 Copy &gt; Copy selector \uba85\ub839\uc744 \uc218\ud589\ud55c\ub2e4.<\/p>\n<p><img decoding=\"async\" src=\"\/workspace\/wp-content\/uploads\/2018\/12\/page_copy_selector-e1545241396787.png\" alt=\"\" \/><\/p>\n<p>\ud14d\uc2a4\ud2b8 \uc5d0\ub514\ud130\uc5d0 \ubd99\uc5ec\ub123\uae30\ub97c \ud574\ubcf4\uba74 <code>#article &gt; div:nth-child(2) &gt; div &gt; div.win_result &gt; div &gt; div.num.bonus &gt; p &gt; span<\/code> \uc640 \uac19\uc774 selector\uac00 \ud074\ub9bd\ubcf4\ub4dc\uc5d0 \ubcf5\uc0ac\ub3fc \uc788\ub294 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.<\/p>\n<h4>3. \ubcf4\ub108\uc2a4 \ubc88\ud638 \ucd94\ucd9c<\/h4>\n<p>\uc55e\uc11c \uc0ac\uc6a9\ud55c requests \ucf54\ub4dc\uc5d0 \uc774\uc774\uc11c \ub2e4\uc74c\uacfc \uac19\uc740 \ud504\ub85c\uadf8\ub7a8\uc73c\ub85c \ubcf4\ub108\uc2a4 \ubc88\ud638\ub97c \ucd94\ucd9c\ud560 \uc218 \uc788\ub2e4. \ubcf5\uc0ac\ud55c selector \uc5d0\uc11c <code>nth-child(n)<\/code>\ub294 <code>nth-of-type(n)<\/code> \uc73c\ub85c \ubcc0\ud615\ud558\uc5ec \uc0ac\uc6a9\ud55c\ub2e4.<\/p>\n<pre><code class=\"py\">import requests\nurl = 'https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;drwNo=1'\nhtml = requests.get(url).text\n\nfrom bs4 import BeautifulSoup\nsoup = BeautifulSoup(html,'html.parser')\n\nselector = '#article &gt; div:nth-of-type(2) &gt; div &gt; div.win_result &gt; div &gt; div.num.bonus &gt; p &gt; span'\n\nprint(soup.select(selector))\n\nbonus = int(soup.select(selector)[0].text.strip())\nprint(bonus)\n<\/code><\/pre>\n<p>\ud504\ub85c\uadf8\ub7a8 \ucd9c\ub825\uc744 \ubcf4\uba74 select \uc218\ud589 \uacb0\uacfc \ud574\ub2f9 \uc624\ube0c\uc81d\ud2b8\uc758 \ucf54\ub4dc\uac00 list \uc790\ub8cc\ud615\uc73c\ub85c \ubc18\ud658\ub418\uace0\uc788\uae30 \ub54c\ubb38\uc5d0 \uc801\uc808\ud558\uac8c \ubcc0\ud615\ud558\uc5ec \ud65c\uc6a9\ud558\uba74 \ub41c\ub2e4.<\/p>\n<pre><code class=\"txt\">[&lt;span class=\"ball_645 lrg ball2\"&gt;16&lt;\/span&gt;]\n16\n<\/code><\/pre>\n<h2>Sample<\/h2>\n<p>\ub85c\ub610 \ub2f9\ucca8\ubc88\ud638 \uc870\ud68c \ud398\uc774\uc9c0\ub294 url \uc5d0\uc11c \ud68c\ucc28\ub97c argument \ub85c \uc785\ub825\ubc1b\uc544 \uc811\uadfc\ud560 \uc218 \uc788\uc73c\ubbc0\ub85c url\ub9cc \ubc14\uafd4\uac00\uba70 1\ud68c\ucc28\ubd80\ud130 836\ud68c\ucc28\uae4c\uc9c0 \ub370\uc774\ud130\ub97c \ubc18\ubcf5\ud558\uc5ec \uc218\uc9d1\ud560 \uc218 \uc788\ub2e4.<br \/>\n\uc544\ub798 \ucf54\ub4dc\ub294 \ubaa8\ub4e0 \ud68c\ucc28\uc758 \ub2f9\uc810 \uacb0\uacfc \ud398\uc774\uc9c0\ub97c \ubc29\ubb38\ud558\uc5ec \ub2f9\ucca8 \ubc88\ud638, \ubcf4\ub108\uc2a4 \ubc88\ud638, \ub4f1\uc704\ubcc4 \ub2f9\ucca8\uae08\uc561, \ub4f1\uc704\ubcc4 \ub2f9\ucca8 \uac8c\uc784\uc218\ub97c dict \uc790\ub8cc\ud615\uc73c\ub85c \uc800\uc7a5\ud55c\ub2e4.<br \/>\n\uc5b4\ub5bb\uac8c \uc774 \ub370\uc774\ud130\ub97c \uc368\uc57c\ud560 \uc9c0 \uc544\uc9c1 \ubbf8\uc815\uc778 \uad00\uacc4\ub85c <code>pickle<\/code>\uc744 \uc774\uc6a9\ud574 list\ub85c \ubb36\uc5b4 \uc77c\ub2e8 \uc800\uc7a5\ud574\ub193\ub294\ub2e4.<\/p>\n<pre><code class=\"py\">#!\/app\/python\/bin\/python\n# crawl_lotto.py\n# dong1lkim\n# 20181214\n\ndef ordinal(n):\n    if   n == 1 : return '1st'\n    elif n == 2 : return '2nd'\n    elif n == 3 : return '3rd'\n    else : return str(n)+'th'\n\nimport requests\nimport re\nfrom bs4 import BeautifulSoup\nimport json\n\nresult = [{\"round\": None,\"win result\": None}]\n\nfor i in range(1,837):\n    win_result = {\"round\":i}\n    url = \"https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;drwNo=\"+str(i)\n    html = requests.get(url).text\n    soup = BeautifulSoup(html,'html.parser')\n\n    win_nums = []\n    for j in range(1,7):\n        selector = '#article &gt; div:nth-of-type(2) &gt; div &gt; div.win_result &gt; div &gt; div.num.win &gt; p &gt; span:nth-of-type('+str(j)+')'\n        win_nums.append(int(soup.select(selector)[0].text.strip()))\n\n    win_result.update({\"win result\":{\"numbers\":win_nums}})\n\n    selector = '#article &gt; div:nth-of-type(2) &gt; div &gt; div.win_result &gt; div &gt; div.num.bonus &gt; p &gt; span'\n    bonus = int(soup.select(selector)[0].text.strip())\n\n    win_result.update({\"win result\":{\"bonus\":bonus}})\n\n    win = {}\n    for j in range(1,6):\n        selector = '#article &gt; div:nth-of-type(2) &gt; div &gt; table &gt; tbody &gt; tr:nth-of-type('+str(j)+') &gt; td:nth-of-type(2) &gt; strong'\n        total = int(re.sub(r\",|\uc6d0\",\"\",soup.select(selector)[0].text.strip()))\n        selector = '#article &gt; div:nth-of-type(2) &gt; div &gt; table &gt; tbody &gt; tr:nth-of-type('+str(j)+') &gt; td:nth-of-type(3)'\n        numofwin = int(re.sub(r\",\",\"\",soup.select(selector)[0].text.strip()))\n        win.update({ordinal(j):{\"total amount\":total,\"number of winners\":numofwin}})\n\n    win_result.update({\"win result\":{\"numbers\":win_nums,\"bonus\":bonus,\"win\":win}})\n\n    print(json.dumps(win_result,indent=4))\n    result.append(win_result)\n\nimport pickle\nwith open('lotto.bin','wb') as f:\n    pickle.dump(result,f)\n    f.close()\n\n#with open('lotto.bin','rb') as f:\n#  data = pickle.load(f)\n#  f.close()\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>requests\ub97c \uc774\uc6a9\ud55c crawler &#8211; \ub85c\ub610 \ud68c\ucc28\ubcc4 \ub2f9\ucca8 \uacb0\uacfc Python \uc744 \uc774\uc6a9\ud574\uc11c \ub85c\ub610 \ub2f9\ucca8 \uacb0\uacfc\ub97c \uc218\uc9d1\ud558\ub294 Crawler\ub97c \ub9cc\ub4e0\ub2e4. \ubaa9\ud45c https:\/\/www.dhlottery.co.kr\/gameResult.do?method=byWin&amp;drwNo=1 \ud398\uc774\uc9c0\uc5d0 \ub4e4\uc5b4\uac00\uba74 \uc544\ub798\uc640 \uac19\uc774 \ud68c\ucc28\ubcc4 \ub2f9\ucca8 \uc815\ubcf4\ub97c \uc870\ud68c\ud560 \uc218 \uc788\ub294\ub370, \uac01 \ud68c\ucc28\ubcc4 \ud398\uc774\uc9c0\uc5d0\uc11c \uc544\ub798 \ud45c\uc2dc\ub41c \ub2f9\ucca8 \ubc88\ud638, \ubcf4\ub108\uc2a4 \ubc88\ud638, \ub4f1\uc704\ubcc4 \ub2f9\ucca8\uae08\uc561, \ub4f1\uc704\ubcc4 \ub2f9\ucca8 \uac8c\uc784 \uc218 \ub370\uc774\ud130\ub97c \uc218\uc9d1\ud55c\ub2e4. \ub9cc\ub4e4\uae30 requests pip install requests \uba85\ub839\uc73c\ub85c requests \ud328\ud0a4\uc9c0\ub97c \uc124\uce58\ud55c\ub2e4. [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[10],"tags":[126,34,127],"class_list":["post-602","post","type-post","status-publish","format-standard","hentry","category-python","tag-crawl","tag-python","tag-requests"],"_links":{"self":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/602","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/comments?post=602"}],"version-history":[{"count":8,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/602\/revisions"}],"predecessor-version":[{"id":1193,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/posts\/602\/revisions\/1193"}],"wp:attachment":[{"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/media?parent=602"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/categories?post=602"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/oboki.net\/workspace\/wp-json\/wp\/v2\/tags?post=602"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}