見出し画像

LINEのトーク履歴を,シートに変換する

今後,個人を再現するAIが普及する.個人を再現するにあたり,過去のLINEのトーク履歴を参照するのが最も手っ取り早い.
今回は,LINEのトーク履歴をシート状に変換するコードを共有する.

# To make the script more flexible and handle names better, we can adjust the regular expression for parsing messages.
# This involves making minimal assumptions about the sender's name format.

def read_and_parse_chat_file_flexibly(file_path):
    import re
    from datetime import datetime

    # Initialize an empty list to store parsed data
    parsed_data = []

    # Regular expression to match the date
    date_regex = re.compile(r"(\d{4}/\d{1,2}/\d{1,2})\(.+?\)")
    # Update regular expression to handle a wider variety of names (less restrictive on name characters)
    message_regex = re.compile(r"(\d{1,2}:\d{2})\t([^:\t\n]+)\t(.+)")

    current_date = None

    # Open and read the file
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            date_match = date_regex.match(line)
            message_match = message_regex.match(line)

            # Check if the line is a date line
            if date_match:
                # Convert string date to datetime object
                current_date = datetime.strptime(date_match.group(1), "%Y/%m/%d").date()
            elif message_match and current_date:
                # Extract time, sender, and message
                time = message_match.group(1)
                sender = message_match.group(2).strip()  # Trim any leading/trailing whitespace
                message = message_match.group(3)

                # Determine message type
                if "[スタンプ]" in message:
                    message_type = "Stamp"
                elif "[写真]" in message:
                    message_type = "Photo"
                else:
                    message_type = "Text"

                # Append parsed data to the list
                parsed_data.append({
                    "Date": current_date,
                    "Time": time,
                    "Sender": sender,
                    "Message Type": message_type,
                    "Content": message if message_type == "Text" else "[Content]"
                })

    return parsed_data

# Specify the path to the chat history file (example path given, adjust as necessary)
file_path = "[LINE] xxxxとのトーク.txt"


import pandas as pd

df = pd.DataFrame(read_and_parse_chat_file_flexibly(file_path)).to_csv('parsed_chat_history.csv', index=False , encoding='utf-8-sig')


この記事が気に入ったらサポートをしてみませんか?