import pandas as pd import glob import os # Collect all .xls and .xlsx files in the current directory excel_files = glob.glob("*.xls") + glob.glob("*.xlsx") for file_path in excel_files: print(f"Processing: {file_path}") # Read the first sheet df = pd.read_excel(file_path, sheet_name=0, header=None) # Remove the first 9 rows df = df.iloc[9:, :] # Remove rows where column A has "TỔNG" df = df[df.iloc[:, 0] != "TỔNG"] # Construct output file name base_name = os.path.splitext(file_path)[0] # e.g. "data" output_file = f"{base_name}_cleaned.xlsx" # Save the result df.to_excel(output_file, index=False, header=False) print(f" --> Cleaned file saved as: {output_file}")