import csv
import os
# Input and Output paths
input_csv = 'ai_agents/firewall_governance/业务系统.csv'
output_csv = 'ai_agents/firewall_governance/dify_knowledge_base.csv'
def clean_csv():
if not os.path.exists(input_csv):
print(f"Error: File {input_csv} not found.")
return
try:
data = []
with open(input_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Select and rename columns
clean_row = {
'System Name': row.get('businessSysName', '').strip(),
'Abbreviation': row.get('abbreviation', '').strip(),
'Alias': row.get('businessSysAlias', '').strip(),
'Description': row.get('desc', '').strip()
}
# Filter out empty rows if system name is missing
if clean_row['System Name']:
data.append(clean_row)
if not data:
print("No data found or processed.")
return
with open(output_csv, 'w', encoding='utf-8', newline='') as f:
fieldnames = ['System Name', 'Abbreviation', 'Alias', 'Description']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
print(f"Successfully processed {len(data)} records.")
print(f"Saved to: {output_csv}")
# Print sample
for i in range(min(5, len(data))):
print(data[i])
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
clean_csv()