commit 7bed8d75157e2277b885d3c160846f4f93e9cb40 Author: thanhtl Date: Mon Feb 24 10:12:21 2025 +0700 Add clean_mshr19_NV.py diff --git a/clean_mshr19_NV.py b/clean_mshr19_NV.py new file mode 100644 index 0000000..ba44058 --- /dev/null +++ b/clean_mshr19_NV.py @@ -0,0 +1,25 @@ +import pandas as pd +import glob +import os + +# Collect all .xls and .xlsx files in the current directory +excel_files = glob.glob("*.xls") + glob.glob("*.xlsx") + +for file_path in excel_files: + print(f"Processing: {file_path}") + # Read the first sheet + df = pd.read_excel(file_path, sheet_name=0, header=None) + + # Remove the first 9 rows + df = df.iloc[9:, :] + + # Remove rows where column A has "TỔNG" + df = df[df.iloc[:, 0] != "TỔNG"] + + # Construct output file name + base_name = os.path.splitext(file_path)[0] # e.g. "data" + output_file = f"{base_name}_cleaned.xlsx" + + # Save the result + df.to_excel(output_file, index=False, header=False) + print(f" --> Cleaned file saved as: {output_file}")