Email Attachment Analyzer#

Learn how to automatically analyze email attachments using AI, extract content, and process different file types intelligently.

🎯 What You'll Build#

An email processing system that: - Monitors email inbox for new messages with attachments - Automatically downloads and analyzes attachments - Extracts text content and metadata from various file types - Uses AI to analyze and categorize document content - Routes documents based on analysis results - Generates summaries and insights

📋 Requirements#

Email account with IMAP access
AI service API (OpenAI Vision, Document AI, etc.)
Cloud storage for file processing
n8n instance running

🔧 Workflow Overview#

View workflow file

Key Components#

Email Trigger - Monitors inbox for new messages
Attachment Handler - Downloads and processes files
Content Extractor - Extracts text and metadata
AI Analyzer - Analyzes content with AI models
Document Router - Routes based on analysis
Storage System - Organizes processed documents

📝 Step-by-Step Guide#

1. Set Up Email Monitoring#

Configure IMAP Connection - Add Email Read IMAP node - Set up email account credentials - Configure folder monitoring (INBOX or specific folder) - Set filters for messages with attachments

Email Filtering

// Filter emails with attachments
const hasAttachments = $json.attachments && $json.attachments.length > 0;
const isFromTrustedSender = $json.from.email.includes('yourdomain.com');
const isRecent = new Date($json.date) > new Date(Date.now() - 24 * 60 * 60 * 1000);

return hasAttachments && (isFromTrustedSender || isRecent);

2. Attachment Processing Pipeline#

Download Attachments

// Process each attachment
const attachments = $json.attachments.map(attachment => ({
  filename: attachment.filename,
  content: attachment.content,
  size: attachment.size,
  type: attachment.contentType,
  downloadUrl: attachment.url
}));

return attachments;

File Type Detection

// Categorize files by type
function categorizeFile(filename, contentType) {
  const extension = filename.split('.').pop().toLowerCase();

  const types = {
    image: ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp'],
    document: ['pdf', 'doc', 'docx', 'txt', 'rtf'],
    spreadsheet: ['xls', 'xlsx', 'csv'],
    presentation: ['ppt', 'pptx'],
    archive: ['zip', 'rar', '7z']
  };

  for (const [category, extensions] of Object.entries(types)) {
    if (extensions.includes(extension)) {
      return category;
    }
  }

  return 'unknown';
}

3. Content Extraction#

PDF Processing#

// Extract text from PDF
async function extractPDFText(pdfContent) {
  const pdf2pic = require('pdf2pic');
  const fs = require('fs');

  // Save PDF temporarily
  const tempPath = `/tmp/${Date.now()}.pdf`;
  fs.writeFileSync(tempPath, Buffer.from(pdfContent, 'base64'));

  try {
    // Use PDF processing library
    const pdfParse = require('pdf-parse');
    const dataBuffer = fs.readFileSync(tempPath);
    const data = await pdfParse(dataBuffer);

    return {
      text: data.text,
      pages: data.numpages,
      info: data.info,
      metadata: data.metadata
    };
  } finally {
    // Clean up temp file
    fs.unlinkSync(tempPath);
  }
}

Image Processing#

// Extract text from images using OCR
async function extractImageText(imageContent, filename) {
  const vision = require('@google-cloud/vision');
  const client = new vision.ImageAnnotatorClient();

  // Convert base64 to buffer
  const imageBuffer = Buffer.from(imageContent, 'base64');

  try {
    const [result] = await client.textDetection(imageBuffer);
    const detections = result.textAnnotations;

    return {
      text: detections[0]?.description || '',
      confidence: detections[0]?.confidence || 0,
      language: detections[0]?.locale || 'unknown',
      detected_objects: result.localizedObjectAnnotations?.map(obj => obj.name) || []
    };
  } catch (error) {
    console.error('OCR Error:', error);
    return { text: '', error: error.message };
  }
}

Document Processing#

// Process Office documents
async function processDocument(content, filename) {
  const mammoth = require('mammoth'); // For .docx files
  const xlsx = require('xlsx'); // For Excel files

  const extension = filename.split('.').pop().toLowerCase();

  try {
    switch (extension) {
      case 'docx':
        const docxResult = await mammoth.extractRawText({ buffer: Buffer.from(content, 'base64') });
        return { text: docxResult.value, type: 'word_document' };

      case 'xlsx':
      case 'xls':
        const workbook = xlsx.read(Buffer.from(content, 'base64'), { type: 'buffer' });
        const sheetNames = workbook.SheetNames;
        const allText = sheetNames.map(name =>
          xlsx.utils.sheet_to_txt(workbook.Sheets[name])
        ).join('\n');
        return { text: allText, type: 'spreadsheet', sheets: sheetNames };

      case 'txt':
        return { text: Buffer.from(content, 'base64').toString('utf-8'), type: 'text' };

      default:
        return { text: '', type: 'unsupported', error: 'File type not supported' };
    }
  } catch (error) {
    return { text: '', type: 'error', error: error.message };
  }
}

4. AI Content Analysis#

Document Summarization#

// Use AI to summarize document content
async function summarizeDocument(text, documentType) {
  const prompt = `
  Analyze this ${documentType} and provide:
  1. A concise summary (2-3 sentences)
  2. Key points or findings
  3. Document type classification
  4. Priority level (low/medium/high)
  5. Recommended action

  Document content:
  ${text.substring(0, 4000)} // Limit to avoid token limits
  `;

  const response = await callOpenAI({
    model: "gpt-3.5-turbo",
    messages: [{ role: "user", content: prompt }],
    temperature: 0.3
  });

  return parseAIResponse(response.choices[0].message.content);
}

Entity Extraction#

// Extract key entities and information
async function extractEntities(text) {
  const prompt = `
  Extract the following entities from this text:
  - Names of people and organizations
  - Dates and deadlines
  - Monetary amounts
  - Contact information (email, phone)
  - Locations and addresses
  - Action items or tasks

  Text to analyze:
  ${text}
  `;

  const response = await callOpenAI({
    model: "gpt-3.5-turbo",
    messages: [{ role: "user", content: prompt }],
    temperature: 0.1
  });

  return {
    entities: parseEntities(response.choices[0].message.content),
    confidence: 'high'
  };
}

Sentiment Analysis#

// Analyze document sentiment and tone
async function analyzeSentiment(text) {
  const prompt = `
  Analyze the sentiment and tone of this document:
  - Overall sentiment (positive/negative/neutral)
  - Urgency level (urgent/normal/low priority)
  - Emotional tone (formal/informal/angry/pleased/etc.)
  - Professional level

  Document:
  ${text.substring(0, 2000)}
  `;

  const response = await callOpenAI({
    model: "gpt-3.5-turbo",
    messages: [{ role: "user", content: prompt }],
    temperature: 0.1
  });

  return parseSentimentResponse(response.choices[0].message.content);
}

5. Document Routing and Storage#

Smart Routing Logic#

// Route documents based on analysis
function routeDocument(analysis) {
  const routing = {
    invoices: {
      condition: (doc) => doc.entities?.monetary_amounts?.length > 0 &&
                          doc.type?.includes('invoice'),
      destination: '/finance/invoices',
      action: 'process_payment',
      priority: 'high'
    },
    contracts: {
      condition: (doc) => doc.summary?.toLowerCase().includes('contract') ||
                        doc.summary?.toLowerCase().includes('agreement'),
      destination: '/legal/contracts',
      action: 'legal_review',
      priority: 'high'
    },
    reports: {
      condition: (doc) => doc.type?.includes('report') ||
                        doc.summary?.toLowerCase().includes('analysis'),
      destination: '/reports',
      action: 'distribute',
      priority: 'medium'
    },
    receipts: {
      condition: (doc) => doc.summary?.toLowerCase().includes('receipt') ||
                        doc.type?.includes('receipt'),
      destination: '/finance/receipts',
      action: 'expense_tracking',
      priority: 'medium'
    }
  };

  for (const [category, rule] of Object.entries(routing)) {
    if (rule.condition(analysis)) {
      return {
        category,
        ...rule,
        metadata: {
          processed_at: new Date().toISOString(),
          confidence: analysis.confidence
        }
      };
    }
  }

  return {
    category: 'general',
    destination: '/documents/general',
    action: 'manual_review',
    priority: 'low'
  };
}

Cloud Storage Organization#

// Organize files in cloud storage
async function organizeFileInCloudStorage(file, routing, content) {
  const { Storage } = require('@google-cloud/storage');
  const storage = new Storage();
  const bucket = storage.bucket('your-document-bucket');

  // Create folder structure
  const folderPath = `${routing.destination}/${new Date().getFullYear()}/${new Date().getMonth() + 1}`;
  const fileName = `${Date.now()}-${file.filename}`;
  const fullPath = `${folderPath}/${fileName}`;

  // Upload file
  await bucket.file(fullPath).save(Buffer.from(file.content, 'base64'), {
    metadata: {
      contentType: file.type,
      metadata: {
        originalEmail: $json.subject,
        sender: $json.from.email,
        processedAt: new Date().toISOString(),
        category: routing.category,
        summary: content.summary,
        priority: routing.priority
      }
    }
  });

  // Make file publicly accessible if needed
  await bucket.file(fullPath).makePublic();

  return {
    url: `https://storage.googleapis.com/your-document-bucket/${fullPath}`,
    path: fullPath,
    size: file.size
  };
}

📊 Advanced Features#

Multi-language Support#

Language Detection

// Detect document language
async function detectLanguage(text) {
  const { Translate } = require('@google-cloud/translate').v2;
  const translate = new Translate();

  const [detection] = await translate.detect(text);
  return {
    language: detection.language,
    confidence: detection.confidence
  };
}

Translation Services

// Translate content to preferred language
async function translateContent(text, targetLanguage) {
  const response = await callOpenAI({
    model: "gpt-3.5-turbo",
    messages: [
      {
        role: "system",
        content: `Translate the following text to ${targetLanguage}. Maintain the original meaning and tone.`
      },
      {
        role: "user",
        content: text
      }
    ],
    temperature: 0.3
  });

  return response.choices[0].message.content;
}

Advanced AI Analysis#

Document Comparison

// Compare documents for similarities
async function compareDocuments(doc1, doc2) {
  const prompt = `
  Compare these two documents and identify:
  1. Similarities in content and structure
  2. Key differences
  3. Relationship between documents (version, related, unrelated)
  4. Recommendations for handling

  Document 1: ${doc1.summary}
  Document 2: ${doc2.summary}
  `;

  const response = await callOpenAI({
    model: "gpt-3.5-turbo",
    messages: [{ role: "user", content: prompt }],
    temperature: 0.2
  });

  return parseComparisonResponse(response.choices[0].message.content);
}

Document Validation

// Validate document authenticity and completeness
async function validateDocument(content, expectedType) {
  const validationRules = {
    invoice: ['invoice number', 'date', 'amount', 'vendor details'],
    contract: ['signatures', 'dates', 'terms', 'parties'],
    report: ['title', 'date', 'author', 'content']
  };

  const requiredFields = validationRules[expectedType] || [];
  const missingFields = requiredFields.filter(field =>
    !content.text.toLowerCase().includes(field.toLowerCase())
  );

  return {
    is_valid: missingFields.length === 0,
    missing_fields: missingFields,
    confidence_score: 1 - (missingFields.length / requiredFields.length)
  };
}

🧪 Testing and Quality Assurance#

Test Document Set#

Create Test Documents

const testDocuments = [
  {
    type: 'invoice',
    filename: 'test-invoice.pdf',
    expected_entities: ['amount', 'invoice_number', 'date'],
    expected_category: 'invoices'
  },
  {
    type: 'contract',
    filename: 'test-contract.docx',
    expected_entities: ['signatures', 'dates', 'terms'],
    expected_category: 'contracts'
  }
];

Automated Testing

// Test document processing pipeline
async function testDocumentProcessing(testDoc) {
  const startTime = Date.now();

  try {
    const content = await processDocument(testDoc.content, testDoc.filename);
    const analysis = await analyzeDocument(content);
    const routing = routeDocument(analysis);

    const success = routing.category === testDoc.expected_category;
    const processingTime = Date.now() - startTime;

    return {
      test_name: testDoc.filename,
      success,
      processing_time: processingTime,
      routing_result: routing.category,
      expected_result: testDoc.expected_category
    };
  } catch (error) {
    return {
      test_name: testDoc.filename,
      success: false,
      error: error.message,
      processing_time: Date.now() - startTime
    };
  }
}

🔍 Troubleshooting#

Common Issues#

File Processing Errors - Unsupported file formats - Corrupted or password-protected files - Large file timeouts - Encoding issues

AI Analysis Problems - API rate limits - Inaccurate classifications - Token limit exceeded - Context window issues

Storage and Routing - Permission errors - Storage quota exceeded - Incorrect folder paths - Network connectivity issues

Debug Tools#

Detailed Logging

// Add comprehensive logging
console.log('Processing document:', {
  filename: file.filename,
  size: file.size,
  type: file.type,
  timestamp: new Date().toISOString()
});

Error Recovery

// Implement retry logic for API failures
async function callAIWithRetry(prompt, maxRetries = 3) {
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      return await callOpenAI({ messages: [{ role: "user", content: prompt }] });
    } catch (error) {
      if (attempt === maxRetries) throw error;

      // Exponential backoff
      await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 1000));
    }
  }
}

📈 Performance Optimization#

Caching Strategies#

Analysis Caching

// Cache AI analysis results
const cache = new Map();

async function getCachedAnalysis(documentHash) {
  if (cache.has(documentHash)) {
    return cache.get(documentHash);
  }

  const analysis = await analyzeDocument(content);
  cache.set(documentHash, analysis);
  return analysis;
}

Batch Processing

// Process multiple documents in parallel
async function processBatch(documents) {
  const batchSize = 5;
  const results = [];

  for (let i = 0; i < documents.length; i += batchSize) {
    const batch = documents.slice(i, i + batchSize);
    const batchResults = await Promise.all(
      batch.map(doc => processDocument(doc))
    );
    results.push(...batchResults);
  }

  return results;
}

🛡️ Security and Compliance#

Data Privacy#

Sensitive Data Redaction

// Redact sensitive information before AI processing
function redactSensitiveData(text) {
  const sensitivePatterns = [
    /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/g, // Credit cards
    /\b\d{3}-\d{2}-\d{4}\b/g, // SSN
    /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g // Email
  ];

  let redactedText = text;
  sensitivePatterns.forEach(pattern => {
    redactedText = redactedText.replace(pattern, '[REDACTED]');
  });

  return redactedText;
}

Access Control - Implement role-based access - Secure API keys and credentials - Audit document access logs - Comply with GDPR, HIPAA if needed

Related Tutorials: - Form Submission - Basic form handling - Email Automation - Email integration guide

Resources: - n8n Email Nodes - OpenAI Vision API - Google Cloud Vision API