Write

【Python】ExcelからCSVに書き出す｜pandas,csv

作成日：2025-09-27

更新日：2025-10-01

pandasで読み込み
csvで書き出す

`pandas` と `csv` モジュールを組み合わせる

pandas や csv を import して使う

import pandas as pd
import csv
import os
from datetime import datetime
from typing import List, Optional

class ExcelToCSVExporter:
    """ExcelシートからCSVファイルをエクスポートするクラス"""

    def __init__(self, excel_file_path: str, sheet_name: str = "対象シート", use_all_columns_for_last_row: bool = False):
        """初期化"""
        self.excel_file_path = excel_file_path
        self.sheet_name = sheet_name
        self.use_all_columns_for_last_row = use_all_columns_for_last_row

        # 定数定義
        self.COL_LASTROW_BASE = "A"  # 最終行の基準とする列記号
        self.ROW_HEADER = 1          # ヘッダー行番号
        self.ROW_DATA_START = 2      # データ開始行番号

        # 出力列定義
        self.fields = ["A", "C", "E", "F", "G"]

    def export_to_csv(self, output_path: Optional[str] = None) -> str:
        """
        ExcelシートからCSVファイルをエクスポート

        Args:
            output_path: 出力ファイルパス（Noneの場合は自動生成）

        Returns:
            str: 出力ファイルパス
        """
        try:
            # Excel読み込み：先頭ゼロや文字列を守るために dtype=str、欠損は空文字へ
            df = pd.read_excel(
                self.excel_file_path,
                sheet_name=self.sheet_name,
                header=None,
                dtype=str,              # 文字列として読む
                engine="openpyxl"
            ).fillna("")                # NaN→""

            if df.empty:
                raise ValueError("シートにデータがありません")

            # 最終行を取得
            last_row = self._get_last_row(df)

            # CSVファイルパスを生成
            if output_path is None:
                output_path = self._generate_output_path()

            # UTF-8 BOM付きでCSVファイルを保存
            self._save_csv_with_bom(df, last_row, output_path)

            print(f"CSVファイルを出力しました: {output_path}")
            return output_path

        except Exception as e:
            print(f"エラーが発生しました: {str(e)}")
            raise

    def _get_last_row(self, df: pd.DataFrame) -> int:
        """
        最終行を取得

        Args:
            df: DataFrame

        Returns:
            int: 最終行番号
        """
        if self.use_all_columns_for_last_row:
            # 全列基準：全セルが空文字の行を除外して末尾を取る
            mask = (df != "").any(axis=1)
            idx = mask[mask].index
            if len(idx) == 0:
                raise ValueError("データがありません")
            return int(idx[-1]) + 1
        else:
            # A列基準
            col_a = df.iloc[:, 0]  # A列（0番目の列）
            last = col_a.last_valid_index()
            if last is None:
                raise ValueError("A列に有効データがありません")
            return int(last) + 1  # 1始まり行番号に


    def _column_letter_to_index(self, column_letter: str) -> int:
        """
        列文字を数値インデックスに変換

        Args:
            column_letter: 列文字（A, B, C, ...）

        Returns:
            int: 列インデックス（0ベース）
        """
        result = 0
        for char in column_letter:
            result = result * 26 + (ord(char) - ord('A') + 1)
        return result - 1


    def _generate_output_path(self) -> str:
        """
        出力ファイルパスを自動生成

        Returns:
            str: 出力ファイルパス
        """
        # Excelファイルと同じディレクトリに出力
        excel_dir = os.path.dirname(self.excel_file_path)
        excel_name = os.path.splitext(os.path.basename(self.excel_file_path))[0]
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

        output_filename = f"{excel_name}_export_{timestamp}.csv"
        return os.path.join(excel_dir, output_filename)

    def _save_csv_with_bom(self, df: pd.DataFrame, last_row: int, output_path: str):
        """
        UTF-8 BOM付きでCSVファイルを保存（逐次書き込みでメモリ効率化）

        Args:
            df: DataFrame
            last_row: 最終行番号
            output_path: 出力ファイルパス
        """
        # 列インデックスに変換
        column_indices = [self._column_letter_to_index(c) for c in self.fields]

        # 範囲スライス（行：ヘッダー～最終行、列：指定列）
        start_r = self.ROW_HEADER - 1
        end_r = last_row  # ilocは終端非含まないので、そのままOK
        sub = df.iloc[start_r:end_r, column_indices]

        # 逐次書き込み（メモリ節約）
        with open(output_path, 'w', encoding='utf-8-sig', newline='') as f:
            writer = csv.writer(f)
            for row in sub.itertuples(index=False, name=None):
                writer.writerow(row)

def main():
    """メイン関数"""
    # 使用例
    excel_file = "sample.xlsx"  # Excelファイルパスを指定
    sheet_name = "対象シート"    # シート名を指定

    try:
        exporter = ExcelToCSVExporter(excel_file, sheet_name)
        output_path = exporter.export_to_csv()
        print(f"エクスポート完了: {output_path}")

    except FileNotFoundError:
        print(f"Excelファイルが見つかりません: {excel_file}")
    except Exception as e:
        print(f"エラー: {str(e)}")


if __name__ == "__main__":
    main()

逐次書き込みでメモリ効率化って？

ループして１行ずつ書いていくこと

逐次書き込み（ループで１行ずつ書く）

with open(output_path, 'w', encoding='utf-8-sig', newline='') as f:
    writer = csv.writer(f)
    for row in sub.itertuples(index=False, name=None):
        writer.writerow(row)  # ここで1行書く

CSVでは１行ずつ書き込む

行数が増えてもメモリ消費はほぼ一定
Excelは配列貼り付け、CSVは１行ずつがベター

このコードの便利さを整理してみるよ

特徴１. タプル1個 = CSVの1行になる

タプルとは

Pythonの基本データ型で、順序付きで変更できない値の集まり
カンマ区切りでまとめるとタプルになる

例）

person = ("山田", 30)
print(person[0])  # "山田"
print(person[1])  # 30

手順

itertuples → DataFrame を１行ずつタプルで返す
index=False → 行番号を含めない
name=None → 「純粋なタプルだけ」をもらえる（CSV出力に最適）
そのタプルを writer.writerow(row) に渡すと、CSVに書き込まれる

ループすれば１行ずつ取り出せる

import pandas as pd

df = pd.DataFrame({
    "名前": ["山田", "鈴木"],
    "年齢": [30, 25]
})

for row in df.itertuples(index=False, name=None):
    print(row)

↓ 結果出力

('山田', 30)
('鈴木', 25)

特徴２. utf-8-sig を指定すれば BOM付きになる

encoding=’utf-8-sig’

これだけでBOM付きUTF-8になる
Excelは配列貼り付け、CSVは１行ずつがベター

BOM付きUTF-8？

日本語の文字化けを防ぐ文字コードのこと

CLICK

BOM付きUTF-8で文字化けを防ぐ｜テキストファイル

特徴３. csv.writer がダブルクォート処理を自動でやってくれる

ダブルクォート処理

文字列に「カンマ・改行・ダブルクォート」が含まれている場合で、文字列として扱いたいときにはダブルクォートで囲う

ダブルクォートで囲う？

ダブルクォートがないと、区切り文字として認識されてしまう

author
月うさぎ

2025-09-27

編集後記：
この記事の内容がベストではないかもしれません。