Phiên bản: 2.0 Ngày cập nhật: 2025 Kiến trúc: Flow-Based Architecture Tác giả: Development Team
WP-CrawlFlow 2.0 là WordPress plugin mạnh mẽ cho data migration và web crawling sử dụng Flow-Based Architecture, cung cấp:
┌─────────────────────────────────────────────────────────────┐
│ WP-CRAWFLOW PLUGIN │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────┐ │
│ │ DASHBOARD │ │ MIGRATION │ │ CRAWL │ │
│ │ KERNEL │ │ SYSTEM │ │ ENGINE │ │
│ │ │ │ │ │ │ │
│ │ • Screen Detect │ │ • Schema Update │ │ • URL Fetch │ │
│ │ • Data Loading │ │ • Version Track │ │ • Data Parse│ │
│ │ • View Render │ │ • Auto Migrate │ │ • Store Data│ │
│ │ • Admin UI │ │ • Rollback │ │ • Queue Mgmt│ │
│ └─────────────────┘ └─────────────────┘ └─────────────┘ │
│ │
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────┐ │
│ │ FLOW COMPOSER │ │ LOGGER │ │ PROJECT │ │
│ │ (REACT) │ │ SYSTEM │ │ MANAGEMENT │ │
│ │ │ │ │ │ │ │
│ │ • Visual Editor │ │ • Lazy Loading │ │ • CRUD Ops │ │
│ │ • Flow Builder │ │ • Daily Logs │ │ • Settings │ │
│ │ • Schema Design │ │ • Error Track │ │ • Analytics │ │
│ │ • Data Preview │ │ • CLI Support │ │ • Export │ │
│ └─────────────────┘ └─────────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────┘
WP-CrawlFlow được thiết kế để giải quyết các vấn đề phức tạp trong data processing và web crawling:
┌─────────────────────────────────────────────────────────────┐
│ CRAWLFLOW ECOSYSTEM │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────┐ │
│ │ WP-CRAWFLOW │ │ CRAWLFLOW CLI │ │ CRAWLFLOW│ │
│ │ PLUGIN │ │ TOOL │ │ CORE │ │
│ │ │ │ │ │ │ │
│ │ • WordPress UI │ │ • Command Line │ │ • Engine│ │
│ │ • Visual Editor │ │ • Batch Process │ │ • API │ │
│ │ • Admin Panel │ │ • Scripts │ │ • Core │ │
│ └─────────────────┘ └─────────────────┘ └─────────┘ │
│ │
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────┐ │
│ │ CRAWLFLOW │ │ CRAWLFLOW │ │ CRAWLFLOW│ │
│ │ DASHBOARD │ │ ANALYTICS │ │ QUEUE │ │
│ │ │ │ │ │ │ │
│ │ • Real-time │ │ • Data Insights │ │ • Jobs │ │
│ │ • Monitoring │ │ • Reports │ │ • Tasks │ │
│ │ • Alerts │ │ • Charts │ │ • Queue │ │
│ └─────────────────┘ └─────────────────┘ └─────────┘ │
└─────────────────────────────────────────────────────────────┘
// Thay vì viết code thủ công
$data = file_get_contents($url);
$parsed = parseData($data);
saveToDatabase($parsed);
// Sử dụng visual flow composer
// Drag & drop các components
// Auto generate code
// Tích hợp hoàn hảo với WordPress
add_action('wp_ajax_crawlflow_save_project', [$this, 'handleSaveProject']);
add_action('admin_menu', [$this, 'registerMenu']);
add_action('wp_loaded', [$this, 'initialize']);
// React-based visual composer
const ProjectComposer = () => {
const [nodes, setNodes] = useState([]);
const [edges, setEdges] = useState([]);
return (
<ReactFlow
nodes={nodes}
edges={edges}
onNodesChange={onNodesChange}
onEdgesChange={onEdgesChange}
/>
);
};
// Tự động migrate database
$migrationService = new MigrationService();
$result = $migrationService->runMigrations();
// Version tracking
$version = $migrationService->getCurrentVersion();
┌─────────────────┐ depends on ┌─────────────────┐ depends on ┌─────────────────┐
│ WP-CRAWFLOW │ ────────────────▶ │ RAKE WORDPRESS │ ────────────────▶ │ RAKE CORE │
│ PLUGIN │ │ ADAPTER │ │ FRAMEWORK │
└─────────────────┘ └─────────────────┘ └─────────────────┘
│ │ │
│ uses │ uses │ uses
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ WORDPRESS │ │ WORDPRESS │ │ PHP 8.1+ │
│ ENVIRONMENT │ │ DATABASE │ │ COMPOSER │
└─────────────────┘ └─────────────────┘ └─────────────────┘
{
"name": "crawlflow/crawlflow",
"require": {
"php": ">=8.1",
"ramphor/rake": "^2.0",
"puleeno/rake-wordpress-adapter": "^2.0"
},
"autoload": {
"psr-4": {
"CrawlFlow\\": "src/"
}
}
}
// WP-CrawlFlow sử dụng Rake Core
use Rake\Rake;
use Rake\Facade\Logger;
use Rake\Manager\Database\MigrationManager;
// WP-CrawlFlow sử dụng Rake WordPress Adapter
use Rake\WordPress\Database\WordPressDatabaseAdapter;
use Rake\WordPress\Hooks\WordPressHooksAdapter;
use Rake\WordPress\Admin\WordPressAdminAdapter;
// Service registration
$app = new Rake();
$app->singleton(DatabaseAdapterInterface::class, WordPressDatabaseAdapter::class);
$app->singleton(WordPressHooksInterface::class, WordPressHooksAdapter::class);
$app->singleton(WordPressAdminInterface::class, WordPressAdminAdapter::class);
wp-crawlflow/
├── src/
│ ├── Kernel/ # Rake Kernel implementations
│ │ ├── CrawlFlowDashboardKernel.php
│ │ ├── CrawlFlowMigrationKernel.php
│ │ └── CrawlFlowConsoleKernel.php
│ ├── Admin/ # WordPress Admin integration
│ │ ├── CrawlFlowController.php
│ │ ├── DashboardService.php
│ │ ├── ProjectService.php
│ │ ├── MigrationService.php
│ │ ├── LogService.php
│ │ └── DashboardRenderer.php
│ ├── Bootstrapper/ # Rake Bootstrapper implementations
│ │ ├── CrawlFlowDashboardBootstrapper.php
│ │ ├── CrawlFlowMigrationBootstrapper.php
│ │ └── CrawlFlowCoreBootstrapper.php
│ ├── ServiceProvider/ # Rake Service Provider implementations
│ │ ├── CrawlFlowDashboardServiceProvider.php
│ │ ├── CrawlFlowMigrationServiceProvider.php
│ │ └── CrawlFlowCoreServiceProvider.php
│ └── Logger/ # Logging system
│ └── CrawlFlowLogger.php
├── assets/
│ ├── css/
│ │ ├── admin.css # Admin styles
│ │ └── composer.css # Flow composer styles
│ └── js/
│ ├── admin.js # Admin JavaScript
│ └── composer-simple.js # React flow composer
├── vendor/ # Composer dependencies
├── wp-crawlflow.php # Main plugin file
├── composer.json
└── README.md
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ WORDPRESS │ │ WP-CRAWFLOW │ │ RAKE CORE │
│ ADMIN │ │ PLUGIN │ │ FRAMEWORK │
│ │ │ │ │ │
│ • Menu Pages │───▶│ • Dashboard │───▶│ • Container │
│ • AJAX Actions │ │ • Migration │ │ • Kernel │
│ • Admin Scripts │ │ • Flow Composer │ │ • Services │
│ • Admin Styles │ │ • Logger │ │ • Facades │
└─────────────────┘ └─────────────────┘ └─────────────────┘
│ │ │
│ │ │
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ WORDPRESS │ │ RAKE WORDPRESS│ │ PHP/COMPOSER │
│ DATABASE │ │ ADAPTER │ │ ENVIRONMENT │
│ │ │ │ │ │
│ • wp_posts │ │ • Database │ │ • Autoloader │
│ • wp_options │ │ • Hooks │ │ • Dependencies │
│ • Custom Tables │ │ • Admin │ │ • Extensions │
│ • Migrations │ │ • Security │ │ • Configuration │
└─────────────────┘ └─────────────────┘ └─────────────────┘
# Upload to WordPress plugins directory
wp-content/plugins/wp-crawlflow/
# Activate plugin trong WordPress admin
# Plugin sẽ tự động run migrations
composer create-project crawlflow/crawlflow wp-content/plugins/crawlflow
// Trong wp-crawlflow.php
class WP_CrawlFlow
{
private Rake $app;
private CrawlFlowController $controller;
public function __construct()
{
// Initialize Rake container
$this->app = new Rake();
// Register service providers
$this->app->register(new CrawlFlowCoreServiceProvider());
$this->app->register(new CrawlFlowDashboardServiceProvider());
$this->app->register(new CrawlFlowMigrationServiceProvider());
// Initialize controller
$this->controller = new CrawlFlowController($this->app);
$this->controller->registerHooks();
}
}
WordPress Admin → CrawlFlow → Projects
// Sử dụng ProjectService
$projectService = new ProjectService();
$project = $projectService->createProject([
'name' => 'My Crawl Project',
'description' => 'Crawl data from website',
'settings' => [
'url' => 'https://example.com',
'selectors' => ['h1', 'h2', '.content'],
'output_format' => 'json'
]
]);
// React-based flow composer
const ProjectComposer = () => {
const [nodes, setNodes] = useState([
{
id: '1',
type: 'input',
data: { label: 'Start' },
position: { x: 0, y: 0 }
},
{
id: '2',
type: 'crawl',
data: { label: 'Crawl URL' },
position: { x: 200, y: 0 }
}
]);
return (
<ReactFlow
nodes={nodes}
edges={edges}
onNodesChange={onNodesChange}
onEdgesChange={onEdgesChange}
/>
);
};
// Plugin tự động run migrations khi activate
$migrationService = new MigrationService($app);
$result = $migrationService->runMigrations();
if ($result['success']) {
Logger::info('Migrations completed successfully');
} else {
Logger::error('Migration failed: ' . $result['error']);
}
// Run migrations manually
$kernel = new CrawlFlowMigrationKernel($app);
$kernel->runMigrations();
// Check migration status
$status = $kernel->checkMigrationStatus();
echo "Current version: " . $status['current_version'];
use Rake\Facade\Logger;
// Logger chỉ được initialize khi cần
Logger::info('Starting crawl process');
Logger::error('Crawl failed', ['url' => $url, 'error' => $error]);
Logger::debug('Processing data', ['count' => count($data)]);
wp-content/crawlflow/
├── crawlflow-2025-01-15.log
├── crawlflow-2025-01-16.log
└── crawlflow-2025-01-17.log
// JavaScript
jQuery.post(ajaxurl, {
action: 'crawlflow_save_project',
nonce: crawlflowAdmin.nonce,
project: projectData
}, function(response) {
if (response.success) {
alert('Project saved successfully');
}
});
// PHP Handler
public function handleSaveProject()
{
if (!wp_verify_nonce($_POST['nonce'], 'crawlflow_save_project')) {
wp_die('Security check failed');
}
$projectService = new ProjectService();
$result = $projectService->createProject($_POST['project']);
if ($result) {
wp_send_json_success(['id' => $result]);
} else {
wp_send_json_error('Failed to save project');
}
}
// Register admin menu
public function registerMenu()
{
add_menu_page(
'CrawlFlow',
'CrawlFlow',
'manage_options',
'crawlflow',
[$this, 'renderProjectsPage'],
'dashicons-networking',
30
);
add_submenu_page(
'crawlflow',
'Projects',
'Projects',
'manage_options',
'crawlflow',
[$this, 'renderProjectsPage']
);
add_submenu_page(
'crawlflow',
'Logs',
'Logs',
'manage_options',
'crawlflow-logs',
[$this, 'renderLogsPage']
);
}
📖 docs/technical-documentation.md
Nội dung:
class CrawlFlowDashboardKernel extends AbstractKernel
{
private DashboardService $dashboardService;
private CrawlFlowController $controller;
public function __construct(Rake $app)
{
parent::__construct($app);
$this->dashboardService = new DashboardService();
$this->controller = new CrawlFlowController($app);
$this->detectCurrentScreen();
$this->loadScreenData();
}
public function render(): void
{
$this->controller->renderPage();
}
}
class MigrationService
{
private Rake $app;
private WordPressDatabaseAdapter $adapter;
public function __construct(Rake $app)
{
$this->app = $app;
$this->adapter = new WordPressDatabaseAdapter();
}
public function runMigrations(): array
{
try {
$schemaPath = $this->app->get('migration_schema_path');
$definitions = $this->getSchemaDefinitions($schemaPath);
foreach ($definitions as $table => $schema) {
$this->createTable($table, $schema);
}
return ['success' => true];
} catch (Exception $e) {
Logger::error('Migration failed: ' . $e->getMessage());
return ['success' => false, 'error' => $e->getMessage()];
}
}
}
class ProjectService
{
private WordPressDatabaseAdapter $adapter;
public function createProject(array $data): int
{
$data['created_at'] = current_time('mysql');
$data['updated_at'] = current_time('mysql');
return $this->adapter->insert('crawlflow_projects', $data);
}
public function getProjects(): array
{
return $this->adapter->getResults("
SELECT * FROM {$this->adapter->getPrefix()}crawlflow_projects
ORDER BY created_at DESC
");
}
}
// Always use WordPress functions with backslash prefix
$result = \wp_verify_nonce($nonce, $action);
// Use WordPress security functions
$sanitized = \sanitize_text_field($input);
// Check capabilities before actions
if (\current_user_can('manage_options')) {
// Perform admin action
}
// Use WordPress hooks properly
\add_action('init', [$this, 'initialize']);
// Use Rake Facades
use Rake\Facade\Logger;
Logger::info('Operation started');
Logger::error('Operation failed', ['context' => $data]);
// Use Rake Container
$app = new Rake();
$service = $app->make(ServiceInterface::class);
// Use Rake Database Adapter
$adapter = new WordPressDatabaseAdapter();
$result = $adapter->query('SELECT * FROM table');
class CrawlFlowControllerTest extends TestCase
{
private CrawlFlowController $controller;
protected function setUp(): void
{
$app = new Rake();
$this->controller = new CrawlFlowController($app);
}
public function testSaveProject(): void
{
// Arrange
$projectData = [
'name' => 'Test Project',
'description' => 'Test Description'
];
// Act
$result = $this->controller->handleSaveProject($projectData);
// Assert
$this->assertTrue($result['success']);
}
}
class CrawlFlowIntegrationTest extends TestCase
{
public function testDashboardRendering(): void
{
// Arrange
$app = new Rake();
$kernel = new CrawlFlowDashboardKernel($app);
// Act
ob_start();
$kernel->render();
$output = ob_get_clean();
// Assert
$this->assertStringContainsString('CrawlFlow', $output);
}
}
class CrawlFlowException extends Exception
{
public function __construct(string $message, array $context = [], int $code = 0, ?Throwable $previous = null)
{
parent::__construct("CrawlFlow error: {$message}", $code, $previous);
}
}
// Usage
try {
$migrationService = new MigrationService($app);
$result = $migrationService->runMigrations();
} catch (CrawlFlowException $e) {
Logger::error('CrawlFlow operation failed: ' . $e->getMessage());
}
Plugin tự động sử dụng WordPress database settings:
// Tự động detect từ WordPress
$adapter = new WordPressDatabaseAdapter();
echo $adapter->getPrefix(); // wp_
echo $adapter->getCharset(); // utf8mb4
echo $adapter->getCollation(); // utf8mb4_unicode_ci
// Logger configuration
add_filter('crawlflow/logger', function($path) {
return '/custom/path/to/logs/crawlflow.log';
});
// Migration configuration
add_filter('crawlflow/migration_schema_path', function($path) {
return '/custom/path/to/schemas/';
});
Class 'CrawlFlow\Admin\CrawlFlowController' not foundSolution:
composer dump-autoload
WordPress not loadedSolution:
// Ensure WordPress is loaded
require_once 'wp-load.php';
Database migration failedSolution:
// Check database permissions
// Verify WordPress database configuration
// Check migration schema files
// Enable debug mode
define('CRAWFLOW_DEBUG', true);
// Check logs
Logger::debug('Debug information');
Logger::error('Error information');
// Use transactions for multiple operations
$adapter->beginTransaction();
try {
foreach ($projects as $project) {
$adapter->insert('crawlflow_projects', $project);
}
$adapter->commit();
} catch (Exception $e) {
$adapter->rollback();
throw $e;
}
// Use batch operations
$adapter->getResults("SELECT * FROM crawlflow_projects LIMIT 1000");
// Use specific columns
$adapter->getResults("SELECT id, name FROM crawlflow_projects WHERE status = 'active'");
WP-CrawlFlow cung cấp giải pháp hoàn chỉnh cho data migration và web crawling trong WordPress với:
// Initialize plugin
$plugin = new WP_CrawlFlow();
// Use dashboard
// WordPress Admin → CrawlFlow → Projects
// Use visual composer
// Projects → Add New → Visual Flow Composer
// Use migration
$migrationService = new MigrationService($app);
$result = $migrationService->runMigrations();
Tài liệu này sẽ được cập nhật thường xuyên khi có thay đổi trong plugin.