Untitled

🧩 Syntax:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import pandas as pd
import time 


def download_data(year,month):
    """
    Parameters
    ----------
    year : TYPE
        year as string
    month : TYPE
        numerical month as string e.g "6" or "11"

    Returns
    -------
    None.

    """
    driver = webdriver.Chrome()

    driver.get("https://www.customs.go.jp/toukei/srch/indexe.htm?M=01&P=0")
    WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"/html/frameset/frame")))
    #frameset = driver.find_element(By.XPATH,"/html/frameset/frame")
    #driver.switch_to.frame(frameset)
    frame = driver.find_element(By.XPATH,"/html/frameset/frame[1]")
    driver.switch_to.frame(frame)

    driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[2]/input[2]").click()

    dropdown_year = Select(driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[3]/select[2]"))
    dropdown_year.select_by_value(year)

    dropdown_month = Select(driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[3]/select[3]"))
    dropdown_month.select_by_value(month)

    dropdown_designation = Select(driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[4]/select"))
    dropdown_designation.select_by_value("2")

    text_form = driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[5]/input")  # Replace 'text_input' with the actual ID of the text form
    # Clear the text field (optional)
    text_form.clear()
    # Write text into the text form
    custom_codes = "270900100 270900900 271019162 271019164 271019166 271019169 271019172 271019174 271019179"
    text_form.send_keys(custom_codes)

    submit_path = "//input[contains(@value,'Search')]"
    driver.find_element(By.XPATH,submit_path).click()

    frame = driver.find_element(By.XPATH,"/html/frameset/frame[1]")
    driver.switch_to.frame(frame)

    download_path = "//input[@name='dl']"
    driver.find_element(By.XPATH,download_path).click()
    driver.quit()
    
    
def get_df(year,month):
   url = f"https://www.customs.go.jp/toukei/srch/indexe.htm?M=01&P=1,2,,,,,,,,1,0,{year},0,{month},0,2,270900100,270900900,271019162,271019164,271019166,271019169,271019172,271019174,271019179,,1,,,,,,,,,,,,,,,,,,,,,,50";
   driver = webdriver.Chrome()
   driver.get(url)
   WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"/html/frameset/frame")))
   try:
       table_path = '/html/body/div[1]/div[3]/div/div/form/div'
       table = WebDriverWait(driver, 45).until(EC.presence_of_element_located(By.XPATH,table_path))
       time.sleep(2)
       table = driver.find_element(By.XPATH,table_path)
       print("exception not hit")
   except:
       table_path = '/html/body/div[1]/div[3]/div/div/form/div/table[2]'
       time.sleep(5)
       table = driver.find_element(By.XPATH,table_path)
       print("exception hit")
   
   headers = [header.text.strip() for header in table.find_elements("tag name", "th")]

   # Extract the table rows
   rows = []
   for row in table.find_elements("tag name", "tr")[1:]:
       row_data = [cell.text.strip() for cell in row.find_elements("tag name", "td")]
       rows.append(row_data)

   expected_headers = ['COUNTRY',
    'UNIT1',
    'UNIT2',
    'CURRENT MONTH',
    'CUMULATIVE YEAR TO DATE',
    'QUANTITY1',
    'QUANTITY2',
    'VALUE',
    'QUANTITY1',
    'QUANTITY2',
    'VALUE']
   
   assert expected_headers == headers
   
   headers = ['COUNTRY',
    'UNIT1',
    'UNIT2',
    'QUANTITY1',
    'QUANTITY2',
    'VALUE',
    'CUMULATIVEQUANTITY1',
    'CUMULATIVEQUANTITY2',
    'CUMULATIVEVALUE']
   
   driver.close()
   return pd.DataFrame(rows, columns=headers)

def month_year_iter( start_month, start_year, end_month, end_year ):
    ym_start= 12*start_year + start_month - 1
    ym_end= 12*end_year + end_month - 1
    for ym in range( ym_start, ym_end ):
        y, m = divmod( ym, 12 )
        yield y, m+1
year  = "2017"
month = "6"

for year,month in month_year_iter(12,2023,4,2024):
        year = str(year)
        month = str(month)
        df = get_df(year,month)
        month = "0"* (2 - len(month)) + month
        df.to_csv(r'C:\Users\Priyesh\Downloads\result\\'+year+month+'.csv')