Python/Crawling

[Crawling] 지마켓 best 품목(상품명,가격,원산지)

퓨어맨 2022. 6. 8. 08:54
# 제어할 크롬창 띄우기
url = 'http://corners.gmarket.co.kr/Bestsellers'
driver = wb.Chrome()
driver.get(url)

# 5번 스크롤 내리기
body = driver.find_element_by_css_selector('body')
for i in range(5):
    body.send_keys(Keys.PAGE_DOWN)
    time.sleep(0.1)
    
# 이미지를 클릭후 뒤로가기
img =  driver.find_elements_by_css_selector('img.lazy')

for i in range(len(img)):
    img =  driver.find_elements_by_css_selector('img.lazy')
    img[i].click()
    time.sleep(0.3)
    
    driver.back()
    time.sleep(0.3)

#  리스트에 추출할 값들 담기
img =  driver.find_elements_by_css_selector('img.lazy')
title_list = []
price_list = []
con_list = []
rank_lsit = []

for i in range(5):
    img =  driver.find_elements_by_css_selector('img.lazy')
    img[i].click()
    time.sleep(0.3)
    
    soup = bs(driver.page_source, 'lxml') # (중요)파싱 작업
    
    title = soup.select_one('h1.itemtit')
    price = soup.select_one('strong.price_real')
    con = soup.select_one('ul > li.list-item-origin.uxeslide_item > div > div')
    
    title_list.append(title.text)
    price_list.append(price.text)
    con_list.append(con.text.strip().split('\n')[1][3:])
    rank_list.append(i+1)
    
    driver.back()
    time.sleep(0.3)

# 딕셔너리 생성
dic = {'상품명' : title_list, '가격' : price_list, 
       '원산지' : con_list, '순위' : rank_list}

# 데이터프레임 생성
df = pd.DataFrame(dic)
df.set_index('순위', inplace = True)
df

 

결과값 (임의로 5개만 생성)