#!/bin/sh # Define input and output files URL="https://unicode.org/Public/emoji/latest/emoji-test.txt" INPUT_FILE="${XDG_DATA_HOME:-${HOME}/.local/share}/thesiah/char/emoji_raw" TEMP_FILE="${XDG_DATA_HOME:-${HOME}/.local/share}/thesiah/char/emoji_temp" OUTPUT_FILE="${XDG_DATA_HOME:-${HOME}/.local/share}/thesiah/char/emoji" # Create the directory for output files if it doesn't exist mkdir -p "$(dirname "$INPUT_FILE")" # Download the emoji file echo "Downloading emoji-test.txt from Unicode..." if curl -o "$INPUT_FILE" -L "$URL"; then echo "Download complete! File saved to: $INPUT_FILE" else echo "Failed to download emoji" exit 1 fi awk ' # Skip empty lines and comments /^[[:space:]]*$/ || /^#/ { next } # Keep only fully-qualified lines !/(fully-qualified|component)/ { next } # Skip lines containing 200D (zero-width joiner) /200D/ { next } # Skip lines containing components $2 ~ /1F3F[BCDEF]/ { next } # Print valid lines { print } ' "$INPUT_FILE" >"$TEMP_FILE" # Second stage: Extract emoji and description awk -F'#' ' { if (NF >= 2) { full_data = $2 # Extract the emoji and description (after #) gsub(/^[[:space:]]+|[[:space:]]+$/, "", full_data) # Trim spaces around the entire field split(full_data, parts, " ") # Split into parts by spaces emoji = parts[1] # First part is the emoji # Reconstruct description from parts[3] onward description = "" for (i = 3; i <= length(parts); i++) { description = description parts[i] " " } # Remove excessive internal spaces and trim description gsub(/[[:space:]]+/, " ", description) gsub(/^[[:space:]]+|[[:space:]]+$/, "", description) # Print emoji and description print emoji, description } } ' "$TEMP_FILE" >"$OUTPUT_FILE" rm -rf "$INPUT_FILE" "$TEMP_FILE" echo "Processing complete! File saved to: $OUTPUT_FILE"