You can scrape tables with
Cheerio
by combining the regular
CSS selectors
with the
find
and each
methods to iterate over the rows and cells of the table.
Here's some sample code that demonstrates how to scrape a simple HTML table using Cheerio:
const cheerio = require('cheerio');
const html = `
<table>
<tr>
<th>Name</th>
<th>Age</th>
<th>Occupation</th>
</tr>
<tr>
<td>Yasoob</td>
<td>35</td>
<td>Software Engineer</td>
</tr>
<tr>
<td>Pierre</td>
<td>28</td>
<td>Product Manager</td>
</tr>
</table>
`;
// Load the HTML content into a Cheerio object
const $ = cheerio.load(html);
// Select the table element
const table = $('table');
// Initialize an empty array to store the table data
const tableData = [];
// Iterate over each row of the table using the find and each methods
table.find('tr').each((i, row) => {
// Initialize an empty object to store the row data
const rowData = {};
// Iterate over each cell of the row using the find and each methods
$(row).find('td, th').each((j, cell) => {
// Add the cell data to the row data object
rowData[$(cell).text()] = j;
});
// Add the row data to the table data array
tableData.push(rowData);
});
// Print the table data
console.log(tableData);
// Output:
// [
// { Name: 0, Age: 1, Occupation: 2 },
// { '35': 1, Yasoob: 0, 'Software Engineer': 2 },
// { '28': 1, Pierre: 0, 'Product Manager': 2 }
// ]