Baixar postagens do Instagram usando o módulo Python Selenium

Neste artigo, aprenderemos como podemos baixar postagens do Instagram de um perfil usando o módulo Python Selenium .

Requisitos:

Google Chrome ou Firefox
Driver do Chrome (para Google Chrome) ou driver Gecko (para Mozilla Firefox)
Pacote Selenium: É uma ferramenta poderosa para controlar um navegador da web através do programa. É funcional para todos os navegadores, funciona em todos os principais sistemas operacionais e seus scripts são escritos em várias linguagens, ou seja, Python, Java, C #, etc. Pode ser instalado usando o comando abaixo:

pip instalar selenium

Belo pacote Soap : é uma biblioteca Python para extrair dados de arquivos HTML e XML. Ele funciona com o seu analisador favorito para fornecer maneiras idiomáticas de navegar, pesquisar e modificar a árvore de análise. Ele pode ser instalado usando o comando abaixo:

pip install bs4

Pacote de solicitações: a biblioteca de solicitações é parte integrante do Python para fazer solicitações HTTP a um URL especificado. Ele pode ser instalado usando o comando abaixo:

pedidos de instalação pip

Abordagem passo a passo:

Etapa 1: Importar módulos e inserir as informações de login junto com o URL da página.

from selenium import webdriver 
from selenium.webdriver.common.keys import Keys 
import selenium.common.exceptions 
import time 
from bs4 import BeautifulSoup as bs 
import requests 
import os 
username = input('Enter Your User Name ') 
password = input('Enter Your Password ')  
url = 'https://instagram.com/' + \ 
    input('Enter User Name Of User For Downloading Posts ')

Passo 2: Função para iniciar a nova sessão do Browser. Pode ser necessário adicionar o caminho para o driver da web. Função Chrome(), depende da sua instalação.

def path(): 
    global chrome 
      
    
    
    chrome = webdriver.Chrome()

Etapa 3: Função para inserir o URL da página.

def url_name(url): 
    
    
    chrome.get(url) 
      
    
    
    
    time.sleep(4)

Etapa 4: Função para inserir suas informações de login.

def login(username, your_password): 
    log_but = chrome.find_element_by_class_name("L3NKy") 
    time.sleep(2) 
    log_but.click() 
    time.sleep(4) 
      
    
    usern = chrome.find_element_by_name("username") 
      
    
    usern.send_keys(username) 
  
    
    passw = chrome.find_element_by_name("password") 
  
    
    passw.send_keys(your_password) 
  
    
    passw.send_keys(Keys.RETURN) 
  
    time.sleep(5.5) 
  
    
    notn = chrome.find_element_by_class_name("yWX7d") 
  
    notn.click() 
    time.sleep(3)

Etapa 5: Função para abrir a primeira postagem.

def first_post(): 
    pic = chrome.find_element_by_class_name("kIKUG").click() 
    time.sleep(2)

Passo 6: Função para baixar todas as postagens.

def download_allposts(): 
  
    
    first_post() 
  
    user_name = url.split('/')[-1] 
  
    
    if(os.path.isdir(user_name) == False): 
        os.mkdir(user_name) 
  
    
    multiple_images = nested_check() 
  
    if multiple_images: 
        nescheck = multiple_images 
        count_img = 0
        while nescheck: 
            elem_img = chrome.find_element_by_class_name('rQDP3') 
      save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img) 
            count_img += 1
            nescheck.click() 
            nescheck = nested_check() 
        save_multiple(user_name+'/'+'content1.' +
                      str(count_img), elem_img, last_img_flag=1) 
    else: 
        save_content('_97aPb', user_name+'/'+'content1') 
    c = 2
      
    while(True): 
        next_el = next_post() 
        if next_el != False: 
            next_el.click() 
            time.sleep(1.3) 
                try: 
                multiple_images = nested_check() 
                        if multiple_images: 
                    nescheck = multiple_images 
                    count_img = 0
                                while nescheck: 
                        elem_img = chrome.find_element_by_class_name('rQDP3') 
                        save_multiple(user_name+'/'+'content' +
                                      str(c)+'.'+str(count_img), elem_img) 
                        count_img += 1
                        nescheck.click() 
                        nescheck = nested_check() 
                    save_multiple(user_name+'/'+'content'+str(c) +
                                  '.'+str(count_img), elem_img, 1) 
                else: 
                    save_content('_97aPb', user_name+'/'+'content'+str(c)) 
                except selenium.common.exceptions.NoSuchElementException: 
                print("finished") 
                return
        else: 
            break
        c += 1

Passo 7: Função para clicar na próxima postagem.

def next_post(): 
    try: 
        nex = chrome.find_element_by_class_name("coreSpriteRightPaginationArrow") 
        return nex 
    except selenium.common.exceptions.NoSuchElementException: 
        return 0

Etapa 8: Função para salvar postagens normais.

def save_content(class_name,img_name): 
    time.sleep(0.5) 
      
    try: 
        pic = chrome.find_element_by_class_name(class_name) 
      
    except selenium.common.exceptions.NoSuchElementException: 
        print("Either This user has no images or you haven't followed this user or something went wrong") 
        return
      
    html = pic.get_attribute('innerHTML') 
    soup = bs(html,'html.parser') 
    link = soup.find('video') 
      
    if link: 
        link = link['src'] 
    else: 
        link = soup.find('img')['src'] 
    response = requests.get(link) 
      
    with open(img_name, 'wb') as f: 
        f.write(response.content) 
      
    time.sleep(0.9)

Etapa 9: Função para salvar postagens aninhadas.

def save_multiple(img_name,elem,last_img_flag = False): 
    time.sleep(1) 
    l = elem.get_attribute('innerHTML') 
    html = bs(l,'html.parser') 
    biglist = html.find_all('ul') 
    biglist = biglist[0] 
    list_images = biglist.find_all('li') 
    if last_img_flag: 
        user_image = list_images[-1] 
    else: 
        user_image = list_images[(len(list_images)//2)] 
    video = user_image.find('video') 
    if video: 
        link = video['src'] 
    else: 
        link = user_image.find('img')['src'] 
    response = requests.get(link) 
    with open(img_name, 'wb') as f: 
        f.write(response.content)

Etapa 10: Função para verificar se a postagem está aninhada ou não.

def nested_check(): 
    
    try: 
        time.sleep(1) 
        nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron  ') 
        return nes_nex 
      
    except selenium.common.exceptions.NoSuchElementException: 
        return 0

Etapa 11: Chamar as funções necessárias no código do driver.

path() 
time.sleep(1) 
url_name(url) 
login(username, password) 
download_allposts() 
chrome.close()

Abaixo está o programa completo com base na abordagem acima:

from selenium import webdriver 
from selenium.webdriver.common.keys import Keys 
import selenium.common.exceptions 
import time 
from bs4 import BeautifulSoup as bs 
import requests 
import os 
  
  
username = input('Enter Your User Name ') 
password = input('Enter Your Password ')   
url = 'https://instagram.com/' + \ 
    input('Enter User Name Of User For Downloading Posts ') 
def path(): 
    global chrome 
    
    
    chrome = webdriver.Chrome() 
      
def url_name(url): 
    
    chrome.get(url) 
      
    
    
    
    time.sleep(4) 
      
def login(username, your_password): 
    log_but = chrome.find_element_by_class_name("L3NKy") 
    time.sleep(2) 
    log_but.click() 
    time.sleep(4) 
    
    usern = chrome.find_element_by_name("username") 
    
    usern.send_keys(username) 
  
    
    passw = chrome.find_element_by_name("password") 
  
    
    passw.send_keys(your_password) 
  
    
    passw.send_keys(Keys.RETURN) 
  
    time.sleep(5.5) 
  
    
    notn = chrome.find_element_by_class_name("yWX7d") 
  
    notn.click() 
    time.sleep(3) 
      
def first_post(): 
    pic = chrome.find_element_by_class_name("kIKUG").click() 
    time.sleep(2) 
      
def next_post(): 
    try: 
        nex = chrome.find_element_by_class_name( 
            "coreSpriteRightPaginationArrow") 
        return nex 
    except selenium.common.exceptions.NoSuchElementException: 
        return 0
        
def download_allposts(): 
  
    
    first_post() 
  
    user_name = url.split('/')[-1] 
  
    
    if(os.path.isdir(user_name) == False): 
        os.mkdir(user_name) 
  
    
    multiple_images = nested_check() 
  
    if multiple_images: 
        nescheck = multiple_images 
        count_img = 0
        while nescheck: 
            elem_img = chrome.find_element_by_class_name('rQDP3') 
      save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img) 
            count_img += 1
            nescheck.click() 
            nescheck = nested_check() 
        save_multiple(user_name+'/'+'content1.' +
                      str(count_img), elem_img, last_img_flag=1) 
    else: 
        save_content('_97aPb', user_name+'/'+'content1') 
    c = 2
      
    while(True): 
        next_el = next_post() 
        if next_el != False: 
            next_el.click() 
            time.sleep(1.3) 
                try: 
                multiple_images = nested_check() 
                        if multiple_images: 
                    nescheck = multiple_images 
                    count_img = 0
                                while nescheck: 
                        elem_img = chrome.find_element_by_class_name('rQDP3') 
                        save_multiple(user_name+'/'+'content' +
                                      str(c)+'.'+str(count_img), elem_img) 
                        count_img += 1
                        nescheck.click() 
                        nescheck = nested_check() 
                    save_multiple(user_name+'/'+'content'+str(c) +
                                  '.'+str(count_img), elem_img, 1) 
                else: 
                    save_content('_97aPb', user_name+'/'+'content'+str(c)) 
                except selenium.common.exceptions.NoSuchElementException: 
                print("finished") 
                return
        else: 
            break
        c += 1
def save_content(class_name, img_name): 
    time.sleep(0.5) 
      
    try: 
        pic = chrome.find_element_by_class_name(class_name) 
      
    except selenium.common.exceptions.NoSuchElementException: 
        print("Either This user has no images or you haven't followed this user or something went wrong") 
        return
      
    html = pic.get_attribute('innerHTML') 
    soup = bs(html, 'html.parser') 
    link = soup.find('video') 
      
    if link: 
        link = link['src'] 
      
    else: 
        link = soup.find('img')['src'] 
    response = requests.get(link) 
      
    with open(img_name, 'wb') as f: 
        f.write(response.content) 
    time.sleep(0.9) 
      
def save_multiple(img_name, elem, last_img_flag=False): 
    time.sleep(1) 
    l = elem.get_attribute('innerHTML') 
    html = bs(l, 'html.parser') 
    biglist = html.find_all('ul') 
    biglist = biglist[0] 
    list_images = biglist.find_all('li') 
      
    if last_img_flag: 
        user_image = list_images[-1] 
      
    else: 
        user_image = list_images[(len(list_images)//2)] 
    video = user_image.find('video') 
      
    if video: 
        link = video['src'] 
      
    else: 
        link = user_image.find('img')['src'] 
    response = requests.get(link) 
      
    with open(img_name, 'wb') as f: 
        f.write(response.content) 
def nested_check(): 
      
    try: 
        time.sleep(1) 
        nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron  ') 
        return nes_nex 
      
    except selenium.common.exceptions.NoSuchElementException: 
        return 0
path() 
time.sleep(1) 
  
url_name(url) 
  
login(username, password) 
  
download_allposts() 
  
chrome.close()

Após executar este script completo, será criado um diretório que conterá todos os posts.

Resultado:

Nota: Se você é usuário do Windows, as postagens serão salvas com extensão .file , abra as postagens com o aplicativo que pode abrir tanto imagens quanto vídeos (as postagens do Instagram possuem apenas tipo de mídia, imagem ou vídeo)

Acervo Lima

O maior acervo de tutoriais e referências

Baixar postagens do Instagram usando o módulo Python Selenium

Requisitos:

Abordagem passo a passo:

Latest posts

Requisitos:

Abordagem passo a passo:

Latest posts

Most popular posts