728x90
1. 동적 웹페이지 크롤링
selenium 라이브러리 , chromedriver 사용
[실전] : CoffeeBean 가맹점 이름 크롤링해보기
'''
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
import datetime
from selenium import webdriver
import time
def CoffeeBean_store(result):
CoffeeBean_URL = "https://www.coffeebeankorea.com/store/store.asp"
wd = webdriver.Chrome(executable_path = 'chromedriver가 있는 경로'
for i in range(1,370):
wd.get(CoffeeBean_URL) #웹 페이지 로딩
time.sleep(1)
try:
wd.execute_script("storePop2(%d)" %i) #script 실행
time.sleep(1)
html = wd.page_source
soupCB = BeautifulSoup(html,'html.parser') #parsing
sotre_name_h2 = soupCB.select("div.store_txt > h2") #h2 검색
store_name = store_name_h2[0].string
print(store_name)
store_info = soupCB.select("div.store_txt > table.store_table > tbody > tr > td") #td 검색
store_address_list = list(store_info[2])
store_address = store_address_list[0]
store_phone = store_info[3].string
result.append([store_name]+[store_address]+[store_phone])
except:
continue
return
def main():
result = []
print('CoffeeBean store crawling >>>>>>>>>>>>>>>>>>>>>>>')
CoffeeBean_store(result)
CB_tbl = pd.DataFrame(result, columns = ('store','address','phone')) #dataframe 만들기
CB_tbl.to_csv('CoffeeBean.csv',encoding='cp949', mode='w', index=True) #csv로 저장
if __name__ == '__main__':
main()
'''
[결과]
2. url 직접 호출로 CoffeeBean 웹페이지 가맹점 이름 크롤링
'''
import urllib.request
import pandas as pd
import datetime
import hjson
def CoffeeBean_store(result):
for i in range(1,370):
api_url = "http://www.coffeebeankorea.com/store/store_data2.asp?storeNo=%d" %i
response = urllib.request.urlopen(api_url) #url 직접호출
jtxt = getRequestUrl(api_url)
jobj = hjson.loads(jtxt)
try:
store_name = jobj[0]['StoreName']
store_address = jobj[0]['StoreAddress']
store_phone = jobj[0]['StoreTel']
print(store_name)
result.append([store_name]+[store_address]+[store_phone])
except:
continue
return
def main():
result = []
print('CoffeeBean store crawling >>>>>>>>>>>>>>>>>>>>>>>>>>>')
CoffeeBean_store(result)
CB_tbl = pd.DataFrame(result, columns=('store','address','phone'))
CB_tbl.to_csv('CoffeeBean.csv',encoding='cp949',mode='w',index=True)
if __name__ == '__main__':
main()
'''
728x90
'전공 > Data Analysis' 카테고리의 다른 글
[이론] 키와 무결성 제약조건 (0) | 2022.03.11 |
---|---|
[빅데이터 분석] 크롤링 - (2) (0) | 2021.04.12 |
[빅데이터 분석] 크롤링 - (1) (0) | 2021.04.10 |