import pandas as pd import numpy as np import matplotlib.pyplot as plt from openpyxl import Workbook from openpyxl import load_workbook from openpyxl.drawing.image import Image from PIL import Image as PILImage from datetime import datetime import win32com.client import win32timezone import os import time from scipy import stats # Outlook constants olFolderInbox = 6 olMailItem = 0 # Your email subject to identify the email with the attachment email_subject = "Query 'Customer report' results" # Path to save the attachment attachment_save_path = os.path.join(os.getcwd(), 'attachment.xls') # Initialize Outlook outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI") # Get the inbox folder inbox = outlook.GetDefaultFolder(olFolderInbox) # Variables to keep track of the most recent email and its received time target_email = None latest_received_time = None # Loop through emails and find the one with the correct subject for email in inbox.Items: print(f"Checking email subject: '{email.Subject}'") if email.Subject == email_subject: # Check if this email's received time is more recent than the previous if latest_received_time is None or email.ReceivedTime > latest_received_time: latest_received_time = email.ReceivedTime target_email = email # If the target email is found, save the attachment if target_email is not None: print(f"Number of attachments in email: {target_email.Attachments.Count}") if target_email.Attachments.Count > 0: for attachment in target_email.Attachments: if attachment.FileName.endswith('.xls'): attachment.SaveAsFile(attachment_save_path) # Save the attachment with the desired file name print(f"Attachment file name: {attachment_save_path}") break # Save only the first Excel attachment else: print("Attachment is not an Excel file.") else: print("No attachment found in the email.") else: print(f"Email with subject '{email_subject}' not found.") # Introducing a small delay time.sleep(2) # Assuming the attachment is an HTML table disguised as .xls if os.path.exists(attachment_save_path): tables = pd.read_html(attachment_save_path) if tables: df = tables[0] # Assuming the file contains only one table # Set the third row as the header new_header = df.iloc[0] df = df[1:] df.columns = new_header else: print("Could not read the attachment as an HTML table.") else: print("Attachment not found.") # Exit the script as the file was not found and `df` is not defined import sys sys.exit() # Data cleaning # Rename columns for ease of use new_columns = ['CID', 'Client', 'Customer', 'Nationality', 'Account manager', 'Customer duration', 'Diff. agents in last 7 days', 'Last 14 day avg.', 'Last 90 day avg.', '-0 day', '-1 day', '-2 day', '-3 day', '-4 day', '-5 day', '-6 day', '-7 day', '-8 day', '-9 day', '-10 day', '-11 day', '-12 day', '-13 day', '-14 day'] df.columns = new_columns # Replace '-' with 0 int_columns = ['-0 day', '-1 day', '-2 day', '-3 day', '-4 day', '-5 day', '-6 day', '-7 day', '-8 day', '-9 day', '-10 day', '-11 day', '-12 day', '-13 day', '-14 day', 'CID', 'Customer duration', 'Diff. agents in last 7 days'] df[int_columns] = df[int_columns].replace('-', 0) # Convert the int columns to integer data type df[int_columns] = df[int_columns].astype(int) # Convert columns to float float_columns = ['Last 14 day avg.', 'Last 90 day avg.'] df[float_columns] = df[float_columns].replace('-', 0) df[float_columns] = df[float_columns].astype(float) # Display updated data types print(df.dtypes) df print(df.info()) # Call decrease calculations # Calculate the decrease in percentage from "Last 90 day avg." to "Last 14 day avg." df['% Decrease 90- to 14 days avg.'] = ((df['Last 14 day avg.'] - df['Last 90 day avg.']) / df['Last 90 day avg.']) * 100 # Round the values to one decimal point df['% Decrease 90- to 14 days avg.'] = df['% Decrease 90- to 14 days avg.'].round(1) # Divide by 100 to get correct excel percentage formatting df['% Decrease 90- to 14 days avg.'] = df['% Decrease 90- to 14 days avg.'] / 100 # Export report filtered_data = df[ (df['% Decrease 90- to 14 days avg.'] < -0.30) & (~df['Client'].str.contains('leaddesk', case=False, na=False)) ] # Sort the data by 'Nationality' and then by '% Decrease 90- to 14 days avg.' ascending (highest decrease to lowest) sorted_data = filtered_data.sort_values(by=['Nationality', '% Decrease 90- to 14 days avg.'], ascending=[True, True]) # Select the desired columns for the report report_data = sorted_data[['Client', 'Nationality', '% Decrease 90- to 14 days avg.']] # Generate a file name with datetime and minutes current_datetime = datetime.now().strftime('%Y-%m-%d_%H-%M') output_file_path = f'customers_to_meet_report_{current_datetime}.xlsx' # Export the report_data DataFrame to an Excel file with the generated file name report_data.to_excel(output_file_path, index=False) # Load the Excel file you just saved using openpyxl wb = load_workbook(output_file_path) # Select the active sheet ws = wb.active # Import the NamedStyle and PatternFill from openpyxl.styles from openpyxl.styles import NamedStyle, PatternFill from openpyxl.comments import Comment # Create a named style for percentage percent_style = NamedStyle(name="percent", number_format="0%") # Create a grey fill pattern grey_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid") for row in range(2, ws.max_row + 1): # start from the second row to skip header cell = ws.cell(row=row, column=3) # 3 indicates the third column which is 'C' cell.style = percent_style # Grey out rows with -100% decrease if cell.value == -1: # -1 in Excel represents -100% when formatted as a percentage for cell in ws[row]: cell.fill = grey_fill # Add a comment at the end of the report explaining the -100% rows comment_text = "Rows with -100% indicate clients who might no longer be active." comment = Comment(comment_text, "System") ws["A1"].comment = comment # Adds the comment to the top-left cell of the report # Freeze header ws.freeze_panes = "A2" def interpolate_color(minval, maxval, val, color1, color2): """ Interpolate between color1 and color2 based on the position of val between minval and maxval. """ ratio = (val - minval) / (maxval - minval) r = int(color1[0] * (1 - ratio) + color2[0] * ratio) g = int(color1[1] * (1 - ratio) + color2[1] * ratio) b = int(color1[2] * (1 - ratio) + color2[2] * ratio) return r, g, b # Yellow and Red colors in RGB yellow_rgb = (255, 255, 0) red_rgb = (255, 0, 0) for row in range(2, ws.max_row + 1): cell = ws.cell(row=row, column=3) # 3 is for column C if -1 < cell.value < -0.3: # -1 is -100% and -0.3 is -30% # Get the interpolated color r, g, b = interpolate_color(-1, -0.3, cell.value, red_rgb, yellow_rgb) # Create the fill pattern with the interpolated color fill = PatternFill(start_color=f"{r:02X}{g:02X}{b:02X}", end_color=f"{r:02X}{g:02X}{b:02X}", fill_type="solid") # Apply the fill to the cell cell.fill = fill for column in ws.columns: max_length = 0 column = [cell for cell in column] for cell in column: try: if len(str(cell.value)) > max_length: max_length = len(cell.value) except: pass adjusted_width = (max_length + 2) # added some padding ws.column_dimensions[cell.column_letter].width = adjusted_width # Save the modified Excel file wb.save(output_file_path)