cl1p.net - The internet clipboard
CL1P.NET V4. Info
Login/Sign Up
cl1p.net/clip
cl1p.net/clip
CL1P.NET V4. Info
Login/Sign Up
This cl1p will be deleted in in 18 days.
Copy
import pandas as pd from selenium import webdriver from selenium.webdriver.common.by import By import time import datetime import mysql.connector import re import os # Function to scrape data from the website def scrape_data(): driver = webdriver.Chrome() driver.maximize_window() URL = "https://www.indiamart.com/" driver.get(URL) # Perform actions to navigate to the desired page electronics_category = driver.find_element(By.XPATH, "//span[text()='Electronics & Electrical']") electronics_category.click() time.sleep(5) office_automation_products = driver.find_element(By.XPATH, "//*[@id='mega-menu']/div/div/div[2]/div[2]/div[1]/div[1]/ul/li[1]/a/b") office_automation_products.click() time.sleep(5) # Simulate filling mobile number and closing pop-up (if any) mobile_number_input = driver.find_element(By.XPATH, "//form[@id='identifiedForm']//input[@id='mbl_idn']") mobile_number_input.send_keys("9632587410") driver.find_element(By.ID, "logintoidentify").click() time.sleep(3) driver.find_element(By.ID, "close_s").click() # Navigate to specific product product_shuttering_plywood = driver.find_element(By.XPATH, "/html/body/div[8]/div[3]/div/section[1]/ul/li[1]/div/div[1]/div[2]/div[1]/div[2]/div/a") product_shuttering_plywood.click() time.sleep(20) # Scrape product details total_product = driver.find_elements(By.XPATH, "//div[@class='l-cl b-w ']") print("Total Product", len(total_product)) prod_name = [] prod_price = [] date_time_list = [] for product in range(1, len(total_product)): prod_name_element = driver.find_elements(By.XPATH, "(//div[@class='rht pnt flx']//a/h3)[" + str(product) + "]") for element in prod_name_element: prod_name.append(element.text) prod_price_element = driver.find_elements(By.XPATH, "(//span[@class='prc cur'])[" + str(product) + "]") for element in prod_price_element: text = element.text split_text = text.split("Get Latest Price") price_text = split_text[0].strip() prod_price.append(price_text) current_date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") date_time_list.append(current_date_time) driver.quit() return prod_name, prod_price, date_time_list # Function to save data to Excel def save_to_excel(prod_name, prod_price, date_time_list): excel_file_path = "date_with_time_product.xlsx" df = pd.DataFrame({"product_name": prod_name, "price": prod_price, "Scraped Date Time": date_time_list}) if os.path.exists(excel_file_path): existing_data_df = pd.read_excel(excel_file_path) combined_df = pd.concat([existing_data_df, df], ignore_index=True) combined_df.drop_duplicates(subset=["product_name", "price"], inplace=True) # Remove duplicates based on product_name and price combined_df.to_excel(excel_file_path, index=False) print("Data successfully appended to Excel file without duplicates.") else: df.to_excel(excel_file_path, index=False) print("Data successfully saved to new Excel file.") # Function to create database table def create_table(cursor): cursor.execute(""" CREATE TABLE IF NOT EXISTS product_detail ( S_NO INT AUTO_INCREMENT PRIMARY KEY, product_name VARCHAR(200) UNIQUE, price VARCHAR(100), scraped_date_time DATETIME ) """) # Function to transfer data from Excel to database def transfer_data_from_excel(cursor, excel_file_path): try: df = pd.read_excel(excel_file_path) print("Column Names:", df.columns) for index, row in df.iterrows(): product_name = str(row['product_name'])[:200] price = re.sub(r'[^\d.]', '', str(row['price'])) scraped_date_time = str(row['Scraped Date Time']) insert_query = "INSERT IGNORE INTO product_detail (product_name, price, scraped_date_time) VALUES (%s, %s, %s)" cursor.execute(insert_query, (product_name, price, scraped_date_time)) print("Data transfer completed successfully.") except Exception as e: print("Error occurred during data transfer:", e) # Connect to MySQL connection = mysql.connector.connect( host='localhost', user='root', password='your_password', database='sys', auth_plugin="mysql_native_password" ) # Create cursor cursor = connection.cursor() # Scrape data from the website prod_name, prod_price, date_time_list = scrape_data() # Save data to Excel save_to_excel(prod_name, prod_price, date_time_list) # Create table and transfer data from Excel to database create_table(cursor) transfer_data_from_excel(cursor, "date_with_time_product.xlsx") # Commit changes and close connection connection.commit() connection.close()