Untitled
🧩 Syntax:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import pandas as pd
import time
def download_data(year,month):
"""
Parameters
----------
year : TYPE
year as string
month : TYPE
numerical month as string e.g "6" or "11"
Returns
-------
None.
"""
driver = webdriver.Chrome()
driver.get("https://www.customs.go.jp/toukei/srch/indexe.htm?M=01&P=0")
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"/html/frameset/frame")))
#frameset = driver.find_element(By.XPATH,"/html/frameset/frame")
#driver.switch_to.frame(frameset)
frame = driver.find_element(By.XPATH,"/html/frameset/frame[1]")
driver.switch_to.frame(frame)
driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[2]/input[2]").click()
dropdown_year = Select(driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[3]/select[2]"))
dropdown_year.select_by_value(year)
dropdown_month = Select(driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[3]/select[3]"))
dropdown_month.select_by_value(month)
dropdown_designation = Select(driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[4]/select"))
dropdown_designation.select_by_value("2")
text_form = driver.find_element(By.XPATH,"/html/body/form/div[1]/div[3]/div/div/p[5]/input") # Replace 'text_input' with the actual ID of the text form
# Clear the text field (optional)
text_form.clear()
# Write text into the text form
custom_codes = "270900100 270900900 271019162 271019164 271019166 271019169 271019172 271019174 271019179"
text_form.send_keys(custom_codes)
submit_path = "//input[contains(@value,'Search')]"
driver.find_element(By.XPATH,submit_path).click()
frame = driver.find_element(By.XPATH,"/html/frameset/frame[1]")
driver.switch_to.frame(frame)
download_path = "//input[@name='dl']"
driver.find_element(By.XPATH,download_path).click()
driver.quit()
def get_df(year,month):
url = f"https://www.customs.go.jp/toukei/srch/indexe.htm?M=01&P=1,2,,,,,,,,1,0,{year},0,{month},0,2,270900100,270900900,271019162,271019164,271019166,271019169,271019172,271019174,271019179,,1,,,,,,,,,,,,,,,,,,,,,,50"
driver = webdriver.Chrome()
driver.get(url)
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"/html/frameset/frame")))
try:
table_path = '/html/body/div[1]/div[3]/div/div/form/div'
table = WebDriverWait(driver, 45).until(EC.presence_of_element_located(By.XPATH,table_path))
time.sleep(2)
table = driver.find_element(By.XPATH,table_path)
print("exception not hit")
except:
table_path = '/html/body/div[1]/div[3]/div/div/form/div/table[2]'
time.sleep(5)
table = driver.find_element(By.XPATH,table_path)
print("exception hit")
headers = [header.text.strip() for header in table.find_elements("tag name", "th")]
# Extract the table rows
rows = []
for row in table.find_elements("tag name", "tr")[1:]:
row_data = [cell.text.strip() for cell in row.find_elements("tag name", "td")]
rows.append(row_data)
expected_headers = ['COUNTRY',
'UNIT1',
'UNIT2',
'CURRENT MONTH',
'CUMULATIVE YEAR TO DATE',
'QUANTITY1',
'QUANTITY2',
'VALUE',
'QUANTITY1',
'QUANTITY2',
'VALUE']
assert expected_headers == headers
headers = ['COUNTRY',
'UNIT1',
'UNIT2',
'QUANTITY1',
'QUANTITY2',
'VALUE',
'CUMULATIVEQUANTITY1',
'CUMULATIVEQUANTITY2',
'CUMULATIVEVALUE']
driver.close()
return pd.DataFrame(rows, columns=headers)
def month_year_iter( start_month, start_year, end_month, end_year ):
ym_start= 12*start_year + start_month - 1
ym_end= 12*end_year + end_month - 1
for ym in range( ym_start, ym_end ):
y, m = divmod( ym, 12 )
yield y, m+1
year = "2017"
month = "6"
for year,month in month_year_iter(12,2023,4,2024):
year = str(year)
month = str(month)
df = get_df(year,month)
month = "0"* (2 - len(month)) + month
df.to_csv(r'C:\Users\Priyesh\Downloads\result\\'+year+month+'.csv')