मैं C ++ में CSV फ़ाइलों को कैसे पढ़ और पार्स कर सकता हूं?

264

मुझे C ++ में CSV फ़ाइल डेटा लोड और उपयोग करने की आवश्यकता है। इस बिंदु पर यह वास्तव में सिर्फ एक अल्पविराम-सीमांकित पार्सर हो सकता है (यानी नई लाइनों और अल्पविराम से बचने के बारे में चिंता न करें)। मुख्य आवश्यकता एक लाइन-बाय-लाइन पार्सर है जो अगली बार प्रत्येक पंक्ति के लिए एक वेक्टर लौटाएगा, जिसे विधि कहा जाता है।

मुझे यह लेख मिला जो काफी आशाजनक लग रहा है: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp

मैंने बूस्ट की आत्मा का कभी उपयोग नहीं किया है, लेकिन मैं इसे आजमाने के लिए तैयार हूं। लेकिन केवल अगर वहाँ एक और अधिक सीधा समाधान नहीं है मैं देख रहा हूँ।

— User 1
स्रोत

11

मैंने boost::spiritपार्सिंग के लिए देखा है । यह एक सरल फ़ाइल प्रारूप को पार्स करने के लिए व्याकरण को धन्यवाद देने के लिए अधिक है। मेरी टीम का कोई व्यक्ति XML को पार्स करने के लिए इसका उपयोग करने की कोशिश कर रहा था और यह डिबग करने के लिए एक दर्द था। boost::spiritहो सके तो दूर रहें ।

— 19

50

क्षमा करें, लेकिन यह भयानक सलाह है। आत्मा हमेशा एक उपयुक्त समाधान नहीं है, लेकिन मैंने इसका उपयोग किया है - और इसका उपयोग जारी है - सफलतापूर्वक कई परियोजनाओं में। इसी तरह के उपकरणों की तुलना में (एंट्ल, लेक्स / याक आदि) इसके महत्वपूर्ण फायदे हैं। CSV को पार्स करने के लिए, यह शायद ओवरकिल है ...

— MattyT

4

@ मट्टी IMHO spiritएक पार्सर कॉम्बीनेटर लाइब्रेरी के लिए उपयोग करना बहुत कठिन है। हास्कल्स (atto)parsecपुस्तकालयों के साथ कुछ (बहुत सुखद) अनुभव होने के बाद मुझे उम्मीद थी कि यह (आत्मा) समान रूप से अच्छी तरह से काम करेगी , लेकिन 600 लाइन कंपाइलर त्रुटियों के साथ लड़ने के बाद इसे छोड़ दिया।

— fho

1

सी CSV पार्सर: sourceforge.net/projects/cccsvparser सी CSV लेखक: sourceforge.net/projects/cccsvwriter

— कुछ समय

296

यदि आप कॉमा और न्यूलाइन से बचने के बारे में परवाह नहीं करते हैं,
और आप कॉमा और न्यूलाइन को उद्धरणों में एम्बेड नहीं कर सकते हैं (यदि आप तब बच नहीं सकते हैं ...)
तो कोड की केवल तीन पंक्तियाँ हैं (ठीक 14 -> लेकिन इसके पूरी फ़ाइल पढ़ने के लिए केवल 15)।

std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
    std::vector<std::string>   result;
    std::string                line;
    std::getline(str,line);

    std::stringstream          lineStream(line);
    std::string                cell;

    while(std::getline(lineStream,cell, ','))
    {
        result.push_back(cell);
    }
    // This checks for a trailing comma with no data after it.
    if (!lineStream && cell.empty())
    {
        // If there was a trailing comma then add an empty element.
        result.push_back("");
    }
    return result;
}

मैं बस एक पंक्ति का प्रतिनिधित्व करने वाला एक वर्ग बनाऊंगा।
फिर उस ऑब्जेक्ट में स्ट्रीम करें:

#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>

class CSVRow
{
    public:
        std::string const& operator[](std::size_t index) const
        {
            return m_data[index];
        }
        std::size_t size() const
        {
            return m_data.size();
        }
        void readNextRow(std::istream& str)
        {
            std::string         line;
            std::getline(str, line);

            std::stringstream   lineStream(line);
            std::string         cell;

            m_data.clear();
            while(std::getline(lineStream, cell, ','))
            {
                m_data.push_back(cell);
            }
            // This checks for a trailing comma with no data after it.
            if (!lineStream && cell.empty())
            {
                // If there was a trailing comma then add an empty element.
                m_data.push_back("");
            }
        }
    private:
        std::vector<std::string>    m_data;
};

std::istream& operator>>(std::istream& str, CSVRow& data)
{
    data.readNextRow(str);
    return str;
}   
int main()
{
    std::ifstream       file("plop.csv");

    CSVRow              row;
    while(file >> row)
    {
        std::cout << "4th Element(" << row[3] << ")\n";
    }
}

लेकिन एक छोटे से काम के साथ हम तकनीकी रूप से एक सूचना-पत्र बना सकते हैं:

class CSVIterator
{   
    public:
        typedef std::input_iterator_tag     iterator_category;
        typedef CSVRow                      value_type;
        typedef std::size_t                 difference_type;
        typedef CSVRow*                     pointer;
        typedef CSVRow&                     reference;

        CSVIterator(std::istream& str)  :m_str(str.good()?&str:NULL) { ++(*this); }
        CSVIterator()                   :m_str(NULL) {}

        // Pre Increment
        CSVIterator& operator++()               {if (m_str) { if (!((*m_str) >> m_row)){m_str = NULL;}}return *this;}
        // Post increment
        CSVIterator operator++(int)             {CSVIterator    tmp(*this);++(*this);return tmp;}
        CSVRow const& operator*()   const       {return m_row;}
        CSVRow const* operator->()  const       {return &m_row;}

        bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == NULL) && (rhs.m_str == NULL)));}
        bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
    private:
        std::istream*       m_str;
        CSVRow              m_row;
};


int main()
{
    std::ifstream       file("plop.csv");

    for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
    {
        std::cout << "4th Element(" << (*loop)[3] << ")\n";
    }
}

— मार्टिन यॉर्क
स्रोत

20

पहला () अगला ()। यह जावा क्या है! केवल मजाक कर रहा है।

— मार्टिन

4

@DarthVader: एक व्यापक विवरण जो अपनी व्यापकता से मूर्खतापूर्ण है। यदि आप स्पष्ट करना चाहेंगे कि यह बुरा क्यों है और फिर इस संदर्भ में यह बुराता क्यों लागू होती है।

— मार्टिन

12

@DarthVader: मुझे लगता है कि यह व्यापक सामान्यीकरण करने के लिए मूर्खतापूर्ण है। उपरोक्त कोड सही ढंग से काम करता है इसलिए मैं वास्तव में इसके साथ कुछ भी गलत देख सकता हूं। लेकिन अगर आपके पास उपरोक्त पर कोई विशिष्ट टिप्पणी है तो मैं इस संदर्भ में निश्चित रूप से विचार करूंगा। लेकिन मैं यह देख सकता हूं कि आप सी # के लिए सामान्यीकृत नियमों के एक सेट का अनुसरण करके और किसी अन्य भाषा में इसे लागू करके कैसे उस निष्कर्ष पर आ सकते हैं।

— मार्टिन जॉर्ज

5

इसके अलावा, यदि आप उपरोक्त कोड के साथ अजीब लिंकिंग समस्याओं में भाग लेते हैं क्योंकि एक अन्य पुस्तकालय कहीं न कहीं परिभाषित करता है istream::operator>>(जैसे Eigen), inlineइसे ठीक करने के लिए ऑपरेटर घोषणा से पहले जोड़ें ।

— sebastian_k

3

यह एक सरल और साफ-सुथरा उदाहरण है कि मैंने कभी देखा है कि इट्रेटर क्लास कैसे बनाया जाए।

— जियानकार्लो स्पोर्टेली

46

बूस्ट टोकनर के उपयोग से समाधान:

std::vector<std::string> vec;
using namespace boost;
tokenizer<escaped_list_separator<char> > tk(
   line, escaped_list_separator<char>('\\', ',', '\"'));
for (tokenizer<escaped_list_separator<char> >::iterator i(tk.begin());
   i!=tk.end();++i) 
{
   vec.push_back(*i);
}

— DTW
स्रोत

9

बूस्टर टोकन पूरी तरह से पूर्ण CSV मानक का समर्थन नहीं करता है, लेकिन कुछ त्वरित वर्कअराउंड हैं। देखें stackoverflow.com/questions/1120140/csv-parser-in-c/…

— रॉल्फ क्रिस्टेंसेन

3

क्या आपके पास अपनी मशीन पर पूरी बूस्ट लाइब्रेरी होनी चाहिए, या क्या आप ऐसा करने के लिए बस उनके कोड के एक उपसमूह का उपयोग कर सकते हैं? 256mb CSV पार्सिंग के लिए बहुत कुछ लगता है ..

— NPike

6

@ नाइक: आप bcp उपयोगिता का उपयोग कर सकते हैं जो केवल हेडर को निकालने के लिए बढ़ावा देती है जो आपको वास्तव में चाहिए।

— iljarn

46

मेरा संस्करण कुछ भी उपयोग नहीं कर रहा है लेकिन मानक C ++ 11 पुस्तकालय है। यह एक्सेल सीएसवी उद्धरण के साथ अच्छी तरह से मुकाबला करता है:

spam eggs,"foo,bar","""fizz buzz"""
1.23,4.567,-8.00E+09

कोड एक परिमित-राज्य मशीन के रूप में लिखा गया है और एक समय में एक वर्ण का उपभोग कर रहा है। मुझे लगता है कि इसके बारे में तर्क करना आसान है।

#include <istream>
#include <string>
#include <vector>

enum class CSVState {
    UnquotedField,
    QuotedField,
    QuotedQuote
};

std::vector<std::string> readCSVRow(const std::string &row) {
    CSVState state = CSVState::UnquotedField;
    std::vector<std::string> fields {""};
    size_t i = 0; // index of the current field
    for (char c : row) {
        switch (state) {
            case CSVState::UnquotedField:
                switch (c) {
                    case ',': // end of field
                              fields.push_back(""); i++;
                              break;
                    case '"': state = CSVState::QuotedField;
                              break;
                    default:  fields[i].push_back(c);
                              break; }
                break;
            case CSVState::QuotedField:
                switch (c) {
                    case '"': state = CSVState::QuotedQuote;
                              break;
                    default:  fields[i].push_back(c);
                              break; }
                break;
            case CSVState::QuotedQuote:
                switch (c) {
                    case ',': // , after closing quote
                              fields.push_back(""); i++;
                              state = CSVState::UnquotedField;
                              break;
                    case '"': // "" -> "
                              fields[i].push_back('"');
                              state = CSVState::QuotedField;
                              break;
                    default:  // end of quote
                              state = CSVState::UnquotedField;
                              break; }
                break;
        }
    }
    return fields;
}

/// Read CSV file, Excel dialect. Accept "quoted fields ""with quotes"""
std::vector<std::vector<std::string>> readCSV(std::istream &in) {
    std::vector<std::vector<std::string>> table;
    std::string row;
    while (!in.eof()) {
        std::getline(in, row);
        if (in.bad() || in.fail()) {
            break;
        }
        auto fields = readCSVRow(row);
        table.push_back(fields);
    }
    return table;
}

— sastanin
स्रोत

6

धन्यवाद, मुझे लगता है कि यह सबसे पूर्ण उत्तर है, बहुत बुरा है जो यहां दफन है।

— मिहाई

तार के इस नेस्टेड वेक्टर आधुनिक प्रोसेसर के लिए एक नहीं है। अपनी कैशिंग क्षमता को फेंक देता है

— निकोलेओस गियोटिस

साथ ही आपको उन सभी स्विच स्टेटमेंट

— निकोलोस गिओटिस

शीर्ष उत्तर ने मेरे लिए काम नहीं किया, क्योंकि मैं एक पुराने संकलक पर हूं। इस उत्तर ने काम किया, वेक्टर प्रारंभिककरण के लिए इसकी आवश्यकता हो सकती है:const char *vinit[] = {""}; vector<string> fields(vinit, end(vinit));

— dr_rk

31

सी ++ स्ट्रिंग टूलकिट लाइब्रेरी (StrTk) एक टोकन ग्रिड वर्ग कि आप या तो से डेटा लोड करने की अनुमति देता है पाठ फ़ाइलें, तार या चार बफ़र्स एक पंक्ति-स्तंभ फैशन में है, और उन्हें पार्स / प्रक्रिया के लिए।

आप पंक्ति परिसीमन और स्तंभ परिसीमन निर्दिष्ट कर सकते हैं या केवल डिफॉल्ट का उपयोग कर सकते हैं।

void foo()
{
   std::string data = "1,2,3,4,5\n"
                      "0,2,4,6,8\n"
                      "1,3,5,7,9\n";

   strtk::token_grid grid(data,data.size(),",");

   for(std::size_t i = 0; i < grid.row_count(); ++i)
   {
      strtk::token_grid::row_type r = grid.row(i);
      for(std::size_t j = 0; j < r.size(); ++j)
      {
         std::cout << r.get<int>(j) << "\t";
      }
      std::cout << std::endl;
   }
   std::cout << std::endl;
}

अधिक उदाहरण यहां देखे जा सकते हैं

— जे मकडजियन
स्रोत

1

हालांकि strtk दोहरे क्षेत्रों का समर्थन करता है , और यहां तक कि आसपास के कोट्स ( थ्रू) को भी अलग करता है options.trim_dquotes = true, लेकिन यह डबल डबलकोट (जैसे फ़ील्ड "She said ""oh no"", and left."को सी-स्ट्रिंग "She said \"oh no\", and left.") को हटाने का समर्थन नहीं करता है । आपको खुद ऐसा करना पड़ेगा।

— प्राची

1

उपयोग करते समय strtk, आपको नए-नए वर्णों को मैन्युअल रूप से हैंडल करना होगा जिसमें न्यूलाइन वर्ण होते हैं।

— 19

29

आप escaped_list_separator के साथ बूस्ट टोकनर का उपयोग कर सकते हैं।

escaped_list_separator csv का सुपरसेट प्राप्त करता है।बूस्ट :: tokenizer

यह केवल बूस्टर टोकन हेडर फ़ाइलों का उपयोग करता है, आवश्यक पुस्तकालयों को बढ़ावा देने के लिए कोई लिंकिंग नहीं।

यहाँ एक उदाहरण है, ( विवरण के लिए C ++ में बूस्ट टोकनलाइज़र के साथ पार्स सीएसवी फ़ाइल देखें Boost::tokenizer):

#include <iostream>     // cout, endl
#include <fstream>      // fstream
#include <vector>
#include <string>
#include <algorithm>    // copy
#include <iterator>     // ostream_operator
#include <boost/tokenizer.hpp>

int main()
{
    using namespace std;
    using namespace boost;
    string data("data.csv");

    ifstream in(data.c_str());
    if (!in.is_open()) return 1;

    typedef tokenizer< escaped_list_separator<char> > Tokenizer;
    vector< string > vec;
    string line;

    while (getline(in,line))
    {
        Tokenizer tok(line);
        vec.assign(tok.begin(),tok.end());

        // vector now contains strings from one row, output to cout here
        copy(vec.begin(), vec.end(), ostream_iterator<string>(cout, "|"));

        cout << "\n----------------------" << endl;
    }
}

— stefanB
स्रोत

और अगर आप एम्बेडेड नई लाइनों mybyteofcode.blogspot.com/2010/11/… को पार्स करने में सक्षम होना चाहते हैं ।

— stefanB

जब यह तकनीक काम करती है, तो मैंने पाया है कि इसका प्रदर्शन बहुत खराब है। प्रति पंक्ति दस फ़ील्ड के साथ 90000 लाइन वाली CSV फ़ाइल को पार्स करने में मेरे 2 गीगाहर्ट्ज़ Xeon पर लगभग 8 सेकंड लगते हैं। पायथन स्टैंडर्ड लाइब्रेरी सीएसवी मॉड्यूल लगभग 0.3 सेकंड में एक ही फाइल को पार्स करता है।

— रोब स्मॉलशायर

@ रब दिलचस्प है - पायथन सीएसवी अलग तरीके से क्या करता है?

— टॉफटिम

1

@RobSmallshire यह एक सरल उदाहरण कोड है जो उच्च प्रदर्शन वाला नहीं है। यह कोड प्रति पंक्ति सभी क्षेत्रों की प्रतियां बनाता है। उच्च प्रदर्शन के लिए आप विभिन्न विकल्पों का उपयोग करेंगे और प्रतियों को बनाने के बजाय बफर में फ़ील्ड्स के संदर्भों को वापस करेंगे।

— stefanB

29

CSV को पार्स करने के लिए स्पिरिट का इस्तेमाल करना ओवरकिल नहीं है। माइक्रो-पार्सिंग कार्यों के लिए आत्मा अच्छी तरह से अनुकूल है। उदाहरण के लिए, आत्मा 2.1 के साथ, यह उतना ही आसान है:

bool r = phrase_parse(first, last,

    //  Begin grammar
    (
        double_ % ','
    )
    ,
    //  End grammar

    space, v);

सदिश, v, मानों से भर जाता है। ट्यूटोरियल की एक श्रृंखला हैनए स्पिरिट 2.1 डॉक्स में इस पर टच करने वाले जो सिर्फ बूस्ट 1.41 के साथ जारी की गई है।

ट्यूटोरियल सरल से जटिल तक प्रगति करता है। CSV पार्सर्स को बीच में कहीं पेश किया जाता है और स्पिरिट का उपयोग करने की विभिन्न तकनीकों को छूता है। जनरेट कोड हाथ से लिखे गए कोड की तरह कड़ा है। उत्पन्न कोडांतरक की जाँच करें!

— जोएल डे गुज़मैन
स्रोत

18

वास्तव में यह ओवरकिल है, संकलन का समय बहुत बड़ा है और सरल "माइक्रो-पार्सिंग कार्यों" के लिए आत्मा का उपयोग करना अनुचित है।

— गर्डिनर

13

मैं यह भी बताना चाहूंगा कि ऊपर दिया गया कोड CSV को पार्स नहीं करता है, यह सिर्फ अल्पविराम द्वारा सीमांकित वेक्टर के प्रकार की एक श्रृंखला को पार्स करता है। यह उद्धरणों, विभिन्न प्रकार के स्तंभों आदि को संभालता नहीं है। कुछ के लिए कम 19 वोटों में, जो इस सवाल का जवाब देता है, मुझे थोड़ा संदिग्ध लगता है।

— गर्डिनर

9

@ गर्डनर बकवास। छोटे पार्सरों के लिए संकलित करने का समय इतना बड़ा नहीं है, लेकिन यह अप्रासंगिक भी है क्योंकि आप कोड को अपनी संकलन इकाई में भर लेते हैं और एक बार संकलित कर लेते हैं । तब आपको केवल इसे लिंक करने की आवश्यकता है और यह उतना ही कुशल है जितना इसे मिलता है। और आपकी अन्य टिप्पणी के लिए, सीएसवी की कई बोलियां हैं, क्योंकि इसके लिए प्रोसेसर हैं। यह निश्चित रूप से एक बहुत उपयोगी बोली नहीं है, लेकिन इसे उद्धृत मूल्यों को संभालने के लिए तुच्छ रूप से बढ़ाया जा सकता है।

— कोनराड रुडोल्फ

11

@konrad: केवल "#include <बूस्ट / स्पिरिट / शामिल / qi.hpp>" सहित एक खाली फ़ाइल में केवल एक मुख्य और कुछ नहीं के साथ 9.7sec लेता है MSVC 2012 के साथ 2.ghz पर चल रहे एक Corei7। यह बेकार है। एक ही मशीन पर 2secs के तहत स्वीकृत उत्तर संकलित करता है, मुझे यह कल्पना करने से नफरत होगी कि 'उचित' Boost.Spirit उदाहरण कब तक संकलन के लिए ले जाएगा।

— गर्डिनर

11

@Gerdiner मुझे आपसे सहमत होना है कि सीवी प्रसंस्करण के रूप में कुछ के लिए भावना का उपयोग करने में ओवरहेड बहुत बढ़िया है।

18

यदि आप करते हैं सीएसवी सही ढंग से पार्स करने के बारे में देखभाल, इस यह करना होगा ... अपेक्षाकृत धीमे के रूप में यह एक समय में एक चार काम करता है।

 void ParseCSV(const string& csvSource, vector<vector<string> >& lines)
    {
       bool inQuote(false);
       bool newLine(false);
       string field;
       lines.clear();
       vector<string> line;

       string::const_iterator aChar = csvSource.begin();
       while (aChar != csvSource.end())
       {
          switch (*aChar)
          {
          case '"':
             newLine = false;
             inQuote = !inQuote;
             break;

          case ',':
             newLine = false;
             if (inQuote == true)
             {
                field += *aChar;
             }
             else
             {
                line.push_back(field);
                field.clear();
             }
             break;

          case '\n':
          case '\r':
             if (inQuote == true)
             {
                field += *aChar;
             }
             else
             {
                if (newLine == false)
                {
                   line.push_back(field);
                   lines.push_back(line);
                   field.clear();
                   line.clear();
                   newLine = true;
                }
             }
             break;

          default:
             newLine = false;
             field.push_back(*aChar);
             break;
          }

          aChar++;
       }

       if (field.size())
          line.push_back(field);

       if (line.size())
          lines.push_back(line);
    }

— माइकल
स्रोत

AFAICT यह एम्बेडेड उद्धरण चिह्नों को सही ढंग से नहीं संभालेगा (जैसे "इस स्ट्रिंग में" "एम्बेडेड उद्धरण चिह्न हैं" "", "फू", 1))

— जेरेमी फ्रेज़र

14

CSV फ़ाइलों के लिए बूस्ट टोकनाइज़र से बच निकले_लिस्ट_सेपरेटर का उपयोग करते समय, तो निम्न के बारे में पता होना चाहिए:

इसके लिए भागने-चरित्र की आवश्यकता होती है (डिफ़ॉल्ट बैक-स्लैश - \)
इसके लिए स्प्लिटर / सेपरेटर-कैरेक्टर (डिफ़ॉल्ट अल्पविराम -) की आवश्यकता होती है
इसके लिए एक बोली-वर्ण (डिफ़ॉल्ट उद्धरण - ") चाहिए

विकी द्वारा निर्दिष्ट CSV प्रारूप बताता है कि डेटा फ़ील्ड में उद्धरणों में विभाजक हो सकते हैं (समर्थित):

1997, फोर्ड, E350, "सुपर, शानदार ट्रक"

विकी द्वारा निर्दिष्ट CSV प्रारूप में कहा गया है कि सिंगल कोट्स को दोहरे-उद्धरणों के साथ संभाला जाना चाहिए (escaped_list_separator सभी उद्धरण वर्णों को हटा देगा):

1997, फोर्ड, E350, "सुपर" "शानदार" "ट्रक"

CSV प्रारूप निर्दिष्ट नहीं करता है कि किसी भी बैक-स्लेश वर्णों को छीन लिया जाना चाहिए (escaped_list_separator सभी एस्केप वर्णों को हटा देगा)।

वृद्धि के डिफ़ॉल्ट व्यवहार को ठीक करने के लिए एक संभावित कार्य-आसन escaped_list_separator:

पहले सभी बैक-स्लेश वर्णों (\) को दो बैक-स्लेश वर्णों (\\) से बदल दें, ताकि वे छीन न जाएं।
दूसरे सभी दोहरे उद्धरणों ("") को एकल बैक-स्लेश वर्ण और उद्धरण (\ ") से बदलें

इस कार्य के आसपास के साइड-इफेक्ट है कि खाली डेटा-फ़ील्ड जो दोहरे-उद्धरण द्वारा दर्शाए गए हैं, एकल-उद्धरण-टोकन में बदल जाएंगे। जब टोकन के माध्यम से पुनरावृत्ति होती है, तो किसी को यह देखना होगा कि क्या टोकन एकल-उद्धरण है, और इसे एक खाली स्ट्रिंग की तरह व्यवहार करें।

सुंदर नहीं है, लेकिन यह काम करता है, जब तक कि उद्धरणों के भीतर नई रूपरेखाएं नहीं हैं।

— रॉल्फ क्रिस्टेंसन
स्रोत

8

आप मेरे FOSS प्रोजेक्ट CSVfix ( अद्यतन लिंक) को देखना चाह सकते हैं ) , जो C ++ में लिखा गया CSV स्ट्रीम एडिटर है। CSV पार्सर कोई पुरस्कार नहीं है, लेकिन नौकरी करता है और पूरा पैकेज वह कर सकता है जो आपको बिना किसी कोड को लिखे आपको चाहिए।

देखें alib / src / a_csv.cpp सीएसवी पार्सर के लिए, और csvlib / src / csved_ioman.cpp ( IOManager::ReadCSV) एक उपयोग उदाहरण के लिए।

— CXW
स्रोत

बहुत अच्छा लगता है ... स्टेटस बीटा / प्रोडक्शन के बारे में क्या?

— न्यूरो

स्थिति "विकास में" है, जैसा कि संस्करण संख्याओं द्वारा सुझाया गया है। मुझे वास्तव में संस्करण 1.0 में जाने से पहले उपयोगकर्ताओं से अधिक फीड बैक की आवश्यकता है। इसके अलावा मेरे पास कुछ और सुविधाएँ हैं जिन्हें मैं जोड़ना चाहता हूँ, CSV से XML उत्पादन के साथ।

इसे बुकमार्क करना, और अगली बार जब मैं उन अद्भुत मानक CSV फ़ाइलों से निपटने की कोशिश करूंगा, तो इसे दूंगा ...

— न्यूरो

8

जैसा कि सभी CSV प्रश्न यहाँ पुनर्निर्देशित होते हैं, मुझे लगता है कि मैं यहाँ अपना उत्तर पोस्ट करूँगा। यह उत्तर सीधे पूछने वाले के प्रश्न को संबोधित नहीं करता है। मैं एक धारा में पढ़ना चाहता था जिसे सीएसवी प्रारूप में जाना जाता है, और प्रत्येक क्षेत्र के प्रकार भी पहले से ही ज्ञात थे। बेशक, नीचे दी गई विधि का उपयोग प्रत्येक क्षेत्र को एक स्ट्रिंग प्रकार होने के लिए किया जा सकता है।

एक उदाहरण के रूप में कि मैं CSV इनपुट स्ट्रीम का उपयोग कैसे करना चाहता था, निम्नलिखित इनपुट पर विचार करें ( CSV पर विकिपीडिया के पृष्ठ से लिया गया है ):

const char input[] =
"Year,Make,Model,Description,Price\n"
"1997,Ford,E350,\"ac, abs, moon\",3000.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",\"\",5000.00\n"
"1996,Jeep,Grand Cherokee,\"MUST SELL!\n\
air, moon roof, loaded\",4799.00\n"
;

फिर, मैं इस तरह से डेटा को पढ़ने में सक्षम होना चाहता था:

std::istringstream ss(input);
std::string title[5];
int year;
std::string make, model, desc;
float price;
csv_istream(ss)
    >> title[0] >> title[1] >> title[2] >> title[3] >> title[4];
while (csv_istream(ss)
       >> year >> make >> model >> desc >> price) {
    //...do something with the record...
}

यही वह उपाय था, जिसे मैंने खत्म किया।

struct csv_istream {
    std::istream &is_;
    csv_istream (std::istream &is) : is_(is) {}
    void scan_ws () const {
        while (is_.good()) {
            int c = is_.peek();
            if (c != ' ' && c != '\t') break;
            is_.get();
        }
    }
    void scan (std::string *s = 0) const {
        std::string ws;
        int c = is_.get();
        if (is_.good()) {
            do {
                if (c == ',' || c == '\n') break;
                if (s) {
                    ws += c;
                    if (c != ' ' && c != '\t') {
                        *s += ws;
                        ws.clear();
                    }
                }
                c = is_.get();
            } while (is_.good());
            if (is_.eof()) is_.clear();
        }
    }
    template <typename T, bool> struct set_value {
        void operator () (std::string in, T &v) const {
            std::istringstream(in) >> v;
        }
    };
    template <typename T> struct set_value<T, true> {
        template <bool SIGNED> void convert (std::string in, T &v) const {
            if (SIGNED) v = ::strtoll(in.c_str(), 0, 0);
            else v = ::strtoull(in.c_str(), 0, 0);
        }
        void operator () (std::string in, T &v) const {
            convert<is_signed_int<T>::val>(in, v);
        }
    };
    template <typename T> const csv_istream & operator >> (T &v) const {
        std::string tmp;
        scan(&tmp);
        set_value<T, is_int<T>::val>()(tmp, v);
        return *this;
    }
    const csv_istream & operator >> (std::string &v) const {
        v.clear();
        scan_ws();
        if (is_.peek() != '"') scan(&v);
        else {
            std::string tmp;
            is_.get();
            std::getline(is_, tmp, '"');
            while (is_.peek() == '"') {
                v += tmp;
                v += is_.get();
                std::getline(is_, tmp, '"');
            }
            v += tmp;
            scan();
        }
        return *this;
    }
    template <typename T>
    const csv_istream & operator >> (T &(*manip)(T &)) const {
        is_ >> manip;
        return *this;
    }
    operator bool () const { return !is_.fail(); }
};

C ++ 11 में नए अभिन्न लक्षण टेम्पलेट द्वारा सरल किए जा सकने वाले निम्नलिखित सहायकों के साथ:

template <typename T> struct is_signed_int { enum { val = false }; };
template <> struct is_signed_int<short> { enum { val = true}; };
template <> struct is_signed_int<int> { enum { val = true}; };
template <> struct is_signed_int<long> { enum { val = true}; };
template <> struct is_signed_int<long long> { enum { val = true}; };

template <typename T> struct is_unsigned_int { enum { val = false }; };
template <> struct is_unsigned_int<unsigned short> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned int> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long long> { enum { val = true}; };

template <typename T> struct is_int {
    enum { val = (is_signed_int<T>::val || is_unsigned_int<T>::val) };
};

इसे ऑनलाइन आज़माएं!

— jxh
स्रोत

6

मैंने केवल हेडर, C ++ 11 CSV पार्सर लिखा है । यह अच्छी तरह से परीक्षण किया गया है, तेज है, संपूर्ण CSV युक्ति (उद्धरण में फ़ील्ड, सीमांकक / टर्मिनेटर, उद्धरण भागने आदि) का समर्थन करता है, और CSVs के लिए खाते में कॉन्फ़िगर करने योग्य है जो विनिर्देश का पालन नहीं करते हैं।

विन्यास एक धाराप्रवाह इंटरफ़ेस के माध्यम से किया जाता है:

// constructor accepts any input stream
CsvParser parser = CsvParser(std::cin)
  .delimiter(';')    // delimited by ; instead of ,
  .quote('\'')       // quoted fields use ' instead of "
  .terminator('\0'); // terminated by \0 instead of by \r\n, \n, or \r

पार्सिंग लूप के लिए एक सीमा है:

#include <iostream>
#include "../parser.hpp"

using namespace aria::csv;

int main() {
  std::ifstream f("some_file.csv");
  CsvParser parser(f);

  for (auto& row : parser) {
    for (auto& field : row) {
      std::cout << field << " | ";
    }
    std::cout << std::endl;
  }
}

— m0meni
स्रोत

1

अच्छा काम है, लेकिन आपको तीन और चीजों को जोड़ने की जरूरत है: (1) रीड हेडर (2) नाम से फ़ील्ड इंडेक्सिंग प्रदान करें (3) स्ट्रिंग के एक ही वेक्टर का पुन: उपयोग करके लूप में मेमोरी को फिर से

— विभाजित न करें

@MaksymGanenko मैं # 3 करता हूं। क्या आप # 2 पर विस्तार से बता सकते हैं?

— m0meni

1

फ़ील्ड को किसी पंक्ति में स्थिति के आधार पर प्राप्त करना बहुत उपयोगी है, लेकिन हेडर में दिया गया नाम (CSV तालिका की पहली पंक्ति में)। उदाहरण के लिए, मैं "दिनांक" फ़ील्ड के साथ CSV तालिका की अपेक्षा करता हूं, लेकिन मुझे नहीं पता कि पंक्ति में "दिनांक" फ़ील्ड इंडेक्स क्या है।

— मेक्सिकम गेनेंको

1

@MaksymGanenko आह मैं देख रहा हूं कि आपका क्या मतलब है। नहीं है github.com/ben-strasser/fast-cpp-csv-parser के लिए जब आप संकलन समय पर अपनी CSV के स्तंभों को पता है, और यह शायद बेहतर खान से है। मैं उन मामलों के लिए एक सीएसवी पार्सर चाहता था जहां आप कई अलग-अलग सीएसवी के लिए समान कोड का उपयोग करना चाहते थे और यह नहीं जानते कि वे समय से पहले क्या दिखते हैं। इसलिए मैं शायद # 2 नहीं जोड़ूंगा, लेकिन मैं भविष्य में कभी भी # 1 जोड़ दूंगा।

— m0meni

5

एक और सीएसवी I / O पुस्तकालय यहाँ पाया जा सकता है:

http://code.google.com/p/fast-cpp-csv-parser/

#include "csv.h"

int main(){
  io::CSVReader<3> in("ram.csv");
  in.read_header(io::ignore_extra_column, "vendor", "size", "speed");
  std::string vendor; int size; double speed;
  while(in.read_row(vendor, size, speed)){
    // do stuff with the data
  }
}

— हेगार्ड फ्लिक
स्रोत

2

अच्छा है, लेकिन यह आपको संकलन समय पर कॉलम की संख्या चुनने के लिए मजबूर करता है। कई अनुप्रयोगों के लिए बहुत उपयोगी नहीं है।

— quant_dev 20

5

C ++ 11 में लोकी एस्टरी के उत्तर के समान एक और समाधान । यहाँ पंक्तियाँ std::tupleएक दिए गए प्रकार की हैं। कोड एक पंक्ति को स्कैन करता है, फिर प्रत्येक सीमांकक तक स्कैन करता है, और उसके बाद मान को सीधे टूपल (थोड़ा सा टेम्पलेट कोड के साथ) में परिवर्तित करता है।

for (auto row : csv<std::string, int, float>(file, ',')) {
    std::cout << "first col: " << std::get<0>(row) << std::endl;
}

Advanges:

उपयोग करने के लिए काफी साफ और सरल, केवल C ++ 11।
के std::tuple<t1, ...>माध्यम से स्वचालित प्रकार रूपांतरण operator>>।

क्या कमी है:

बचकर भागना
विकृत सीएसवी के मामले में कोई त्रुटि से निपटने।

मुख्य कोड:

#include <iterator>
#include <sstream>
#include <string>

namespace csvtools {
    /// Read the last element of the tuple without calling recursively
    template <std::size_t idx, class... fields>
    typename std::enable_if<idx >= std::tuple_size<std::tuple<fields...>>::value - 1>::type
    read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
        std::string cell;
        std::getline(in, cell, delimiter);
        std::stringstream cell_stream(cell);
        cell_stream >> std::get<idx>(out);
    }

    /// Read the @p idx-th element of the tuple and then calls itself with @p idx + 1 to
    /// read the next element of the tuple. Automatically falls in the previous case when
    /// reaches the last element of the tuple thanks to enable_if
    template <std::size_t idx, class... fields>
    typename std::enable_if<idx < std::tuple_size<std::tuple<fields...>>::value - 1>::type
    read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
        std::string cell;
        std::getline(in, cell, delimiter);
        std::stringstream cell_stream(cell);
        cell_stream >> std::get<idx>(out);
        read_tuple<idx + 1, fields...>(in, out, delimiter);
    }
}

/// Iterable csv wrapper around a stream. @p fields the list of types that form up a row.
template <class... fields>
class csv {
    std::istream &_in;
    const char _delim;
public:
    typedef std::tuple<fields...> value_type;
    class iterator;

    /// Construct from a stream.
    inline csv(std::istream &in, const char delim) : _in(in), _delim(delim) {}

    /// Status of the underlying stream
    /// @{
    inline bool good() const {
        return _in.good();
    }
    inline const std::istream &underlying_stream() const {
        return _in;
    }
    /// @}

    inline iterator begin();
    inline iterator end();
private:

    /// Reads a line into a stringstream, and then reads the line into a tuple, that is returned
    inline value_type read_row() {
        std::string line;
        std::getline(_in, line);
        std::stringstream line_stream(line);
        std::tuple<fields...> retval;
        csvtools::read_tuple<0, fields...>(line_stream, retval, _delim);
        return retval;
    }
};

/// Iterator; just calls recursively @ref csv::read_row and stores the result.
template <class... fields>
class csv<fields...>::iterator {
    csv::value_type _row;
    csv *_parent;
public:
    typedef std::input_iterator_tag iterator_category;
    typedef csv::value_type         value_type;
    typedef std::size_t             difference_type;
    typedef csv::value_type *       pointer;
    typedef csv::value_type &       reference;

    /// Construct an empty/end iterator
    inline iterator() : _parent(nullptr) {}
    /// Construct an iterator at the beginning of the @p parent csv object.
    inline iterator(csv &parent) : _parent(parent.good() ? &parent : nullptr) {
        ++(*this);
    }

    /// Read one row, if possible. Set to end if parent is not good anymore.
    inline iterator &operator++() {
        if (_parent != nullptr) {
            _row = _parent->read_row();
            if (!_parent->good()) {
                _parent = nullptr;
            }
        }
        return *this;
    }

    inline iterator operator++(int) {
        iterator copy = *this;
        ++(*this);
        return copy;
    }

    inline csv::value_type const &operator*() const {
        return _row;
    }

    inline csv::value_type const *operator->() const {
        return &_row;
    }

    bool operator==(iterator const &other) {
        return (this == &other) or (_parent == nullptr and other._parent == nullptr);
    }
    bool operator!=(iterator const &other) {
        return not (*this == other);
    }
};

template <class... fields>
typename csv<fields...>::iterator csv<fields...>::begin() {
    return iterator(*this);
}

template <class... fields>
typename csv<fields...>::iterator csv<fields...>::end() {
    return iterator();
}

मैंने GitHub पर एक छोटा सा काम करने वाला उदाहरण रखा ; मैं इसे कुछ संख्यात्मक डेटा पार्स करने के लिए उपयोग कर रहा हूं और इसने अपने उद्देश्य को पूरा किया है।

— Spak
स्रोत

1

आप इनलाइनिंग के बारे में परवाह नहीं कर सकते हैं, क्योंकि अधिकांश संकलक इसे अपने दम पर तय करते हैं। विज़ुअल C ++ में कम से कम मुझे यकीन है। यह आपके विधि विनिर्देश के स्वतंत्र रूप से इनलाइन विधि कर सकता है।

— मृ। पारसिक

1

यही कारण है कि मैंने उन्हें स्पष्ट रूप से चिह्नित किया है। जीसीसी और क्लैंग, जिनका मैं ज्यादातर उपयोग करता हूं, उनके स्वयं के सम्मेलन हैं। एक "इनलाइन" कीवर्ड सिर्फ एक प्रोत्साहन होना चाहिए।

— Spak

4

यहां एक यूनिकोड CSV पार्सर (wchar_t के साथ काम करता है) का एक और कार्यान्वयन है। मैंने इसका हिस्सा लिखा, जबकि जोनाथन लेफ़लर ने बाकी लिखा।

नोट: यह पार्सर एक्सेल के व्यवहार को यथासंभव बारीकी से दोहराने के उद्देश्य से है, विशेष रूप से टूटी हुई या विकृत सीएसवी फ़ाइलों को आयात करते समय ।

यह मूल प्रश्न है - मल्टीलाइन फ़ील्ड के साथ CSV फ़ाइल पार्स करना और दोहरे उद्धरणों से बच गया

यह SSCCE (लघु, स्व-निहित, सही उदाहरण) के रूप में कोड है।

#include <stdbool.h>
#include <wchar.h>
#include <wctype.h>

extern const wchar_t *nextCsvField(const wchar_t *p, wchar_t sep, bool *newline);

// Returns a pointer to the start of the next field,
// or zero if this is the last field in the CSV
// p is the start position of the field
// sep is the separator used, i.e. comma or semicolon
// newline says whether the field ends with a newline or with a comma
const wchar_t *nextCsvField(const wchar_t *p, wchar_t sep, bool *newline)
{
    // Parse quoted sequences
    if ('"' == p[0]) {
        p++;
        while (1) {
            // Find next double-quote
            p = wcschr(p, L'"');
            // If we don't find it or it's the last symbol
            // then this is the last field
            if (!p || !p[1])
                return 0;
            // Check for "", it is an escaped double-quote
            if (p[1] != '"')
                break;
            // Skip the escaped double-quote
            p += 2;
        }
    }

    // Find next newline or comma.
    wchar_t newline_or_sep[4] = L"\n\r ";
    newline_or_sep[2] = sep;
    p = wcspbrk(p, newline_or_sep);

    // If no newline or separator, this is the last field.
    if (!p)
        return 0;

    // Check if we had newline.
    *newline = (p[0] == '\r' || p[0] == '\n');

    // Handle "\r\n", otherwise just increment
    if (p[0] == '\r' && p[1] == '\n')
        p += 2;
    else
        p++;

    return p;
}

static wchar_t *csvFieldData(const wchar_t *fld_s, const wchar_t *fld_e, wchar_t *buffer, size_t buflen)
{
    wchar_t *dst = buffer;
    wchar_t *end = buffer + buflen - 1;
    const wchar_t *src = fld_s;

    if (*src == L'"')
    {
        const wchar_t *p = src + 1;
        while (p < fld_e && dst < end)
        {
            if (p[0] == L'"' && p+1 < fld_s && p[1] == L'"')
            {
                *dst++ = p[0];
                p += 2;
            }
            else if (p[0] == L'"')
            {
                p++;
                break;
            }
            else
                *dst++ = *p++;
        }
        src = p;
    }
    while (src < fld_e && dst < end)
        *dst++ = *src++;
    if (dst >= end)
        return 0;
    *dst = L'\0';
    return(buffer);
}

static void dissect(const wchar_t *line)
{
    const wchar_t *start = line;
    const wchar_t *next;
    bool     eol;
    wprintf(L"Input %3zd: [%.*ls]\n", wcslen(line), wcslen(line)-1, line);
    while ((next = nextCsvField(start, L',', &eol)) != 0)
    {
        wchar_t buffer[1024];
        wprintf(L"Raw Field: [%.*ls] (eol = %d)\n", (next - start - eol), start, eol);
        if (csvFieldData(start, next-1, buffer, sizeof(buffer)/sizeof(buffer[0])) != 0)
            wprintf(L"Field %3zd: [%ls]\n", wcslen(buffer), buffer);
        start = next;
    }
}

static const wchar_t multiline[] =
   L"First field of first row,\"This field is multiline\n"
    "\n"
    "but that's OK because it's enclosed in double quotes, and this\n"
    "is an escaped \"\" double quote\" but this one \"\" is not\n"
    "   \"This is second field of second row, but it is not multiline\n"
    "   because it doesn't start \n"
    "   with an immediate double quote\"\n"
    ;

int main(void)
{
    wchar_t line[1024];

    while (fgetws(line, sizeof(line)/sizeof(line[0]), stdin))
        dissect(line);
    dissect(multiline);

    return 0;
}

— sashoalm
स्रोत

3

मुझे CSV फ़ाइलों को पार्स करने के लिए एक आसान-से-उपयोग C ++ लाइब्रेरी की आवश्यकता थी, लेकिन कोई भी उपलब्ध नहीं मिल सका, इसलिए मैंने एक का निर्माण किया। Rapidcsv एक C ++ 11 हेडर-ओनली लाइब्रेरी है जो पसंद के डेटाटाइप में, पार्स किए गए कॉलम (या पंक्तियों) को वैक्टर के रूप में सीधे एक्सेस देता है। उदाहरण के लिए:

#include <iostream>
#include <vector>
#include <rapidcsv.h>

int main()
{
  rapidcsv::Document doc("../tests/msft.csv");

  std::vector<float> close = doc.GetColumn<float>("Close");
  std::cout << "Read " << close.size() << " values." << std::endl;
}

— d99kris
स्रोत

1

अच्छा काम है, लेकिन हेडर खाली लेबल है, तो पुस्तकालय ठीक से काम नहीं करता है। यह एक्सेल / लिब्रे ऑफिस NxN टेबल के लिए विशिष्ट है। इसके अलावा, यह डेटा की अंतिम पंक्ति को छोड़ सकता है। दुर्भाग्य से, आपका दायित्व मजबूत नहीं है।

— मैक्सीम गेनेंको

1

प्रतिक्रिया के लिए धन्यवाद @MaksymGanenko मैंने अंतिम लाइनों w / o अनुगामी लाइन ब्रेक के लिए "डेटा की अंतिम पंक्ति" बग को ठीक कर दिया है। के रूप में उल्लेखित अन्य मुद्दे के लिए - "खाली लेबल वाले हेडर" - मुझे यकीन नहीं है कि यह क्या संदर्भित करता है? लाइब्रेरी को खाली लेबल (उद्धृत और गैर-उद्धृत दोनों) को संभालना चाहिए। यह हेडर पंक्ति / कॉलम के बिना CSV को भी पढ़ सकता है, लेकिन तब उपयोगकर्ता को इसे निर्दिष्ट करने की आवश्यकता होती है (कोल शीर्षक आईडी -1 और पंक्ति शीर्षक आईडी -1)। कृपया कुछ और विवरण प्रदान करें या GitHub पृष्ठ पर बग की रिपोर्ट करें यदि आपके पास कुछ विशिष्ट उपयोग-मामला है जिसे आप समर्थित देखना चाहते हैं। धन्यवाद!

— d99kris

2

क्षमा करें, लेकिन यह सब कोड की कुछ पंक्तियों को छिपाने के लिए विस्तृत वाक्यविन्यास की तरह प्रतीत होता है।

ऐसा क्यों नहीं:

/**

  Read line from a CSV file

  @param[in] fp file pointer to open file
  @param[in] vls reference to vector of strings to hold next line

  */
void readCSV( FILE *fp, std::vector<std::string>& vls )
{
    vls.clear();
    if( ! fp )
        return;
    char buf[10000];
    if( ! fgets( buf,999,fp) )
        return;
    std::string s = buf;
    int p,q;
    q = -1;
    // loop over columns
    while( 1 ) {
        p = q;
        q = s.find_first_of(",\n",p+1);
        if( q == -1 ) 
            break;
        vls.push_back( s.substr(p+1,q-p-1) );
    }
}

int _tmain(int argc, _TCHAR* argv[])
{
    std::vector<std::string> vls;
    FILE * fp = fopen( argv[1], "r" );
    if( ! fp )
        return 1;
    readCSV( fp, vls );
    readCSV( fp, vls );
    readCSV( fp, vls );
    std::cout << "row 3, col 4 is " << vls[3].c_str() << "\n";

    return 0;
}

— ravenspoint
स्रोत

एर्म, ",\n"स्ट्रिंग में क्यों होगा ?

— टिम्मम नोव

@Timmmm स्ट्रिंग वर्ग की मूल विधि को देखते हैं, और आप देखेंगे कि इसमें कई अक्षर हैं, \ n न्यूलाइन वर्ण है, इसलिए यह इस उदाहरण में एक एकल वर्ण के रूप में गिना जाता है। यह एक पूरे के रूप में पूरे मूल्य के लिए खोज नहीं करता है। यह प्रत्येक व्यक्ति के चरित्र को खोज रहा है; अर्थात् अल्पविराम या न्यूलाइन। रूट पहले अक्षर की स्थिति को लौटाता है, और -1 अगर यह न तो पाता है, जिसका अर्थ है कि यह लाइन पढ़ना समाप्त हो गया है। fp आंतरिक रूप से फ़ाइल में स्थिति का ट्रैक रखता है, इसलिए प्रत्येक कॉल करने के लिए readCSV इसे एक समय में एक पंक्ति ले जाता है।

— मार्टन शट

2

मैट्रिक्स पढ़ने के लिए यहाँ कोड है, ध्यान दें कि आपके पास matlab में एक csvwrite फ़ंक्शन भी है

void loadFromCSV( const std::string& filename )
{
    std::ifstream       file( filename.c_str() );
    std::vector< std::vector<std::string> >   matrix;
    std::vector<std::string>   row;
    std::string                line;
    std::string                cell;

    while( file )
    {
        std::getline(file,line);
        std::stringstream lineStream(line);
        row.clear();

        while( std::getline( lineStream, cell, ',' ) )
            row.push_back( cell );

        if( !row.empty() )
            matrix.push_back( row );
    }

    for( int i=0; i<int(matrix.size()); i++ )
    {
        for( int j=0; j<int(matrix[i].size()); j++ )
            std::cout << matrix[i][j] << " ";

        std::cout << std::endl;
    }
}

— जिम एम।
स्रोत

2

आप fopen, fscanf फ़ंक्शन का उपयोग करके .csv फ़ाइल खोल और पढ़ सकते हैं, लेकिन महत्वपूर्ण बात डेटा को पार्स करना है। delimiter का उपयोग करके डेटा को पार्स करने का सबसे आसान तरीका है। .csv, सीमांकक के मामले में '' है। '

मान लीजिए कि आपका data1.csv फ़ाइल निम्नानुसार है:

A,45,76,01
B,77,67,02
C,63,76,03
D,65,44,04

आप चार्ट में डेटा और स्टोर को टोकन कर सकते हैं और बाद में उपयुक्त रूपांतरणों के लिए अटोई () आदि फ़ंक्शन का उपयोग कर सकते हैं

FILE *fp;
char str1[10], str2[10], str3[10], str4[10];

fp = fopen("G:\\data1.csv", "r");
if(NULL == fp)
{
    printf("\nError in opening file.");
    return 0;
}
while(EOF != fscanf(fp, " %[^,], %[^,], %[^,], %s, %s, %s, %s ", str1, str2, str3, str4))
{
    printf("\n%s %s %s %s", str1, str2, str3, str4);
}
fclose(fp);

[^,], ^ -it तर्क, का अर्थ है किसी भी स्ट्रिंग से मेल खाता है जिसमें अल्पविराम नहीं है, फिर अंतिम, अल्पविराम से मेल खाने के लिए कहते हैं, जो पिछले स्ट्रिंग को समाप्त करता है।

— अमृता घोडके
स्रोत

2

पहली चीज जो आपको करने की ज़रूरत है वह सुनिश्चित करें कि फ़ाइल मौजूद है। इसे पूरा करने के लिए आपको बस पथ पर फ़ाइल स्ट्रीम को खोलने और खोलने की आवश्यकता है। आपके द्वारा फ़ाइल स्ट्रीम उपयोग स्ट्रीम खोलने के बाद स्ट्रीम (।) यह देखने के लिए कि क्या यह अपेक्षित है, या नहीं।

bool fileExists(string fileName)
{

ifstream test;

test.open(fileName.c_str());

if (test.fail())
{
    test.close();
    return false;
}
else
{
    test.close();
    return true;
}
}

आपको यह भी सत्यापित करना होगा कि प्रदान की गई फ़ाइल सही प्रकार की फ़ाइल है। इसे पूरा करने के लिए आपको फ़ाइल एक्सटेंशन मिलने तक फ़ाइल पथ देखने की आवश्यकता है। एक बार जब आपके पास फ़ाइल एक्सटेंशन होता है, तो सुनिश्चित करें कि यह एक .csv फ़ाइल है।

bool verifyExtension(string filename)
{
int period = 0;

for (unsigned int i = 0; i < filename.length(); i++)
{
    if (filename[i] == '.')
        period = i;
}

string extension;

for (unsigned int i = period; i < filename.length(); i++)
    extension += filename[i];

if (extension == ".csv")
    return true;
else
    return false;
}

यह फ़ंक्शन फ़ाइल एक्सटेंशन को लौटा देगा जो बाद में एक त्रुटि संदेश में उपयोग किया जाता है।

string getExtension(string filename)
{
int period = 0;

for (unsigned int i = 0; i < filename.length(); i++)
{
    if (filename[i] == '.')
        period = i;
}

string extension;

if (period != 0)
{
    for (unsigned int i = period; i < filename.length(); i++)
        extension += filename[i];
}
else
    extension = "NO FILE";

return extension;
}

यह फ़ंक्शन वास्तव में ऊपर बनाई गई त्रुटि जांचों को कॉल करेगा और फिर फ़ाइल के माध्यम से पार्स करेगा।

void parseFile(string fileName)
{
    if (fileExists(fileName) && verifyExtension(fileName))
    {
        ifstream fs;
        fs.open(fileName.c_str());
        string fileCommand;

        while (fs.good())
        {
            string temp;

            getline(fs, fileCommand, '\n');

            for (unsigned int i = 0; i < fileCommand.length(); i++)
            {
                if (fileCommand[i] != ',')
                    temp += fileCommand[i];
                else
                    temp += " ";
            }

            if (temp != "\0")
            {
                // Place your code here to run the file.
            }
        }
        fs.close();
    }
    else if (!fileExists(fileName))
    {
        cout << "Error: The provided file does not exist: " << fileName << endl;

        if (!verifyExtension(fileName))
        {
            if (getExtension(fileName) != "NO FILE")
                cout << "\tCheck the file extension." << endl;
            else
                cout << "\tThere is no file in the provided path." << endl;
        }
    }
    else if (!verifyExtension(fileName)) 
    {
        if (getExtension(fileName) != "NO FILE")
            cout << "Incorrect file extension provided: " << getExtension(fileName) << endl;
        else
            cout << "There is no file in the following path: " << fileName << endl;
    }
}

— एलिजाबेथ कार्ड
स्रोत

2

जब आप किसी चीज का इतना सुंदर उपयोग करते हैं तो आप गर्व महसूस करते हैं boost::spirit

यहाँ इस लिंक CSV स्पेक्स पर CSV विनिर्देशों के अनुपालन में एक पार्सर (लगभग) का मेरा प्रयास ( जरूरत नहीं है (मुझे खेतों के भीतर ब्रेक की जरूरत नहीं थी। कॉमा के आसपास के स्पेस भी खारिज हो गए हैं)।

आपके द्वारा इस कोड के संकलन के लिए 10 सेकंड प्रतीक्षा करने के चौंकाने वाले अनुभव को दूर करने के बाद :), आप वापस बैठ सकते हैं और आनंद ले सकते हैं।

// csvparser.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>

#include <iostream>
#include <string>

namespace qi = boost::spirit::qi;
namespace bascii = boost::spirit::ascii;

template <typename Iterator>
struct csv_parser : qi::grammar<Iterator, std::vector<std::string>(), 
    bascii::space_type>
{
    qi::rule<Iterator, char()                                           > COMMA;
    qi::rule<Iterator, char()                                           > DDQUOTE;
    qi::rule<Iterator, std::string(),               bascii::space_type  > non_escaped;
    qi::rule<Iterator, std::string(),               bascii::space_type  > escaped;
    qi::rule<Iterator, std::string(),               bascii::space_type  > field;
    qi::rule<Iterator, std::vector<std::string>(),  bascii::space_type  > start;

    csv_parser() : csv_parser::base_type(start)
    {
        using namespace qi;
        using qi::lit;
        using qi::lexeme;
        using bascii::char_;

        start       = field % ',';
        field       = escaped | non_escaped;
        escaped     = lexeme['"' >> *( char_ -(char_('"') | ',') | COMMA | DDQUOTE)  >> '"'];
        non_escaped = lexeme[       *( char_ -(char_('"') | ',')                  )        ];
        DDQUOTE     = lit("\"\"")       [_val = '"'];
        COMMA       = lit(",")          [_val = ','];
    }

};

int main()
{
    std::cout << "Enter CSV lines [empty] to quit\n";

    using bascii::space;
    typedef std::string::const_iterator iterator_type;
    typedef csv_parser<iterator_type> csv_parser;

    csv_parser grammar;
    std::string str;
    int fid;
    while (getline(std::cin, str))
    {
        fid = 0;

        if (str.empty())
            break;

        std::vector<std::string> csv;
        std::string::const_iterator it_beg = str.begin();
        std::string::const_iterator it_end = str.end();
        bool r = phrase_parse(it_beg, it_end, grammar, space, csv);

        if (r && it_beg == it_end)
        {
            std::cout << "Parsing succeeded\n";
            for (auto& field: csv)
            {
                std::cout << "field " << ++fid << ": " << field << std::endl;
            }
        }
        else
        {
            std::cout << "Parsing failed\n";
        }
    }

    return 0;
}

संकलित करें:

make csvparser

परीक्षण (उदाहरण विकिपीडिया से चुराया गया ):

./csvparser
Enter CSV lines [empty] to quit

1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
Parsing succeeded
field 1: 1999
field 2: Chevy
field 3: Venture "Extended Edition, Very Large"
field 4: 
field 5: 5000.00

1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00"
Parsing failed

— jav
स्रोत

2

यह समाधान इन 4 मामलों का पता लगाता है

पूरा क्लास है

https://github.com/pedro-vicente/csv-parser

1,field 2,field 3,
1,field 2,"field 3 quoted, with separator",
1,field 2,"field 3
with newline",
1,field 2,"field 3
with newline and separator,",

यह चरित्र द्वारा फ़ाइल वर्ण को पढ़ता है, और एक वेक्टर (स्ट्रिंग्स) के लिए एक समय में 1 पंक्ति पढ़ता है, इसलिए बहुत बड़ी फ़ाइलों के लिए उपयुक्त है।

उपयोग है

एक खाली पंक्ति (फ़ाइल का अंत) वापस आने तक Iterate करें। एक पंक्ति एक वेक्टर है जहां प्रत्येक प्रविष्टि एक सीएसवी कॉलम है।

read_csv_t csv;
csv.open("../test.csv");
std::vector<std::string> row;
while (true)
{
  row = csv.read_row();
  if (row.size() == 0)
  {
    break;
  }
}

वर्ग की घोषणा

class read_csv_t
{
public:
  read_csv_t();
  int open(const std::string &file_name);
  std::vector<std::string> read_row();
private:
  std::ifstream m_ifs;
};

कार्यान्वयन

std::vector<std::string> read_csv_t::read_row()
{
  bool quote_mode = false;
  std::vector<std::string> row;
  std::string column;
  char c;
  while (m_ifs.get(c))
  {
    switch (c)
    {
      /////////////////////////////////////////////////////////////////////////////////////////////////////
      //separator ',' detected. 
      //in quote mode add character to column
      //push column if not in quote mode
      /////////////////////////////////////////////////////////////////////////////////////////////////////

    case ',':
      if (quote_mode == true)
      {
        column += c;
      }
      else
      {
        row.push_back(column);
        column.clear();
      }
      break;

      /////////////////////////////////////////////////////////////////////////////////////////////////////
      //quote '"' detected. 
      //toggle quote mode
      /////////////////////////////////////////////////////////////////////////////////////////////////////

    case '"':
      quote_mode = !quote_mode;
      break;

      /////////////////////////////////////////////////////////////////////////////////////////////////////
      //line end detected
      //in quote mode add character to column
      //return row if not in quote mode
      /////////////////////////////////////////////////////////////////////////////////////////////////////

    case '\n':
    case '\r':
      if (quote_mode == true)
      {
        column += c;
      }
      else
      {
        return row;
      }
      break;

      /////////////////////////////////////////////////////////////////////////////////////////////////////
      //default, add character to column
      /////////////////////////////////////////////////////////////////////////////////////////////////////

    default:
      column += c;
      break;
    }
  }

  //return empty vector if end of file detected 
  m_ifs.close();
  std::vector<std::string> v;
  return v;
}

— पेड्रो विसेंट
स्रोत

1

आप Qtपुस्तकालय की क्षमताओं पर भी नज़र डाल सकते हैं ।

इसमें नियमित अभिव्यक्ति का समर्थन है और QString वर्ग के पास अच्छे तरीके हैं, उदाहरण के लिए split()QStringList को वापस करना, एक प्रदान किए गए सीमांकक के साथ मूल स्ट्रिंग को विभाजित करके प्राप्त स्ट्रिंग्स की सूची। सीएसवी फ़ाइल के लिए पर्याप्त होना चाहिए ..

किसी दिए गए हेडर नाम के साथ एक कॉलम प्राप्त करने के लिए मैं निम्नलिखित का उपयोग करता हूं: c ++ वंशानुक्रम Qt समस्या qstring

— मड
स्रोत

यह उद्धरणों में कॉमा को नहीं संभालेगा

— Ezee

1

यदि आप अपनी परियोजना में वृद्धि को शामिल नहीं करना चाहते हैं (यह बहुत बड़ा है यदि आप इसका उपयोग करने वाले हैं तो CSV पार्सिंग है ...)

मुझे यहाँ CSV के लिए भाग्य का साथ मिला है:

http://www.zedwood.com/article/112/cpp-csv-parser

यह उद्धृत फ़ील्ड्स को हैंडल करता है - लेकिन इनलाइन \ n वर्णों को संभालता नहीं है (जो कि ज्यादातर उपयोगों के लिए ठीक है)।

— NPike
स्रोत

1

संकलक को गैर-जरूरी सब कुछ नहीं करना चाहिए?

— टॉफटिम

1

यह एक पुराना धागा है, लेकिन फिर भी यह खोज परिणामों में सबसे ऊपर है, इसलिए मैं अपने समाधान को जोड़ रहा हूँ std :: stringstream और यवस ब्यूम्स द्वारा एक सरल स्ट्रिंग विधि की जगह जो मैंने यहाँ पाया।

निम्नलिखित उदाहरण लाइन द्वारा एक फ़ाइल लाइन पढ़ेंगे, // के साथ शुरू होने वाली टिप्पणी लाइनों को अनदेखा करें और अन्य लाइनों को तार, ints और युगल के संयोजन में पार्स करें। स्ट्रिंगस्ट्रीम पार्सिंग करता है, लेकिन व्हॉट्सएप द्वारा फ़ील्ड्स को सीमांकित किए जाने की उम्मीद करता है, इसलिए मैं पहले स्थानों में कॉमा को चालू करने के लिए स्ट्रिंगर का उपयोग करता हूं। यह टैब को ठीक से हैंडल करता है, लेकिन उद्धृत स्ट्रिंग्स के साथ सौदा नहीं करता है।

खराब या गायब इनपुट को केवल अनदेखा कर दिया जाता है, जो आपकी परिस्थिति के आधार पर अच्छा या नहीं हो सकता है।

#include <string>
#include <sstream>
#include <fstream>

void StringReplace(std::string& str, const std::string& oldStr, const std::string& newStr)
// code by  Yves Baumes
// http://stackoverflow.com/questions/1494399/how-do-i-search-find-and-replace-in-a-standard-string
{
  size_t pos = 0;
  while((pos = str.find(oldStr, pos)) != std::string::npos)
  {
     str.replace(pos, oldStr.length(), newStr);
     pos += newStr.length();
  }
}

void LoadCSV(std::string &filename) {
   std::ifstream stream(filename);
   std::string in_line;
   std::string Field;
   std::string Chan;
   int ChanType;
   double Scale;
   int Import;
   while (std::getline(stream, in_line)) {
      StringReplace(in_line, ",", " ");
      std::stringstream line(in_line);
      line >> Field >> Chan >> ChanType >> Scale >> Import;
      if (Field.substr(0,2)!="//") {
         // do your stuff 
         // this is CBuilder code for demonstration, sorry
         ShowMessage((String)Field.c_str() + "\n" + Chan.c_str() + "\n" + IntToStr(ChanType) + "\n" +FloatToStr(Scale) + "\n" +IntToStr(Import));
      }
   }
}

— marcp
स्रोत

1

इसके लायक क्या है, यहां मेरा कार्यान्वयन है। यह wstring इनपुट से संबंधित है, लेकिन इसे आसानी से स्ट्रिंग में समायोजित किया जा सकता है। यह फ़ील्ड में न्यूलाइन को हैंडल नहीं करता है (जैसा कि मेरा एप्लिकेशन या तो नहीं करता है, लेकिन इसके समर्थन को जोड़ना बहुत मुश्किल नहीं है) और यह RFC के अनुसार लाइन के "\ r \ n" अंत का अनुपालन नहीं करता है (यह मानते हुए कि आप उपयोग करते हैं :: getline), लेकिन यह व्हॉट्सएप ट्रिमिंग और डबल-कोट्स को सही तरीके से (उम्मीद से) हैंडल करता है।

using namespace std;

// trim whitespaces around field or double-quotes, remove double-quotes and replace escaped double-quotes (double double-quotes)
wstring trimquote(const wstring& str, const wstring& whitespace, const wchar_t quotChar)
{
    wstring ws;
    wstring::size_type strBegin = str.find_first_not_of(whitespace);
    if (strBegin == wstring::npos)
        return L"";

    wstring::size_type strEnd = str.find_last_not_of(whitespace);
    wstring::size_type strRange = strEnd - strBegin + 1;

    if((str[strBegin] == quotChar) && (str[strEnd] == quotChar))
    {
        ws = str.substr(strBegin+1, strRange-2);
        strBegin = 0;
        while((strEnd = ws.find(quotChar, strBegin)) != wstring::npos)
        {
            ws.erase(strEnd, 1);
            strBegin = strEnd+1;
        }

    }
    else
        ws = str.substr(strBegin, strRange);
    return ws;
}

pair<unsigned, unsigned> nextCSVQuotePair(const wstring& line, const wchar_t quotChar, unsigned ofs = 0)
{
    pair<unsigned, unsigned> r;
    r.first = line.find(quotChar, ofs);
    r.second = wstring::npos;
    if(r.first != wstring::npos)
    {
        r.second = r.first;
        while(((r.second = line.find(quotChar, r.second+1)) != wstring::npos)
            && (line[r.second+1] == quotChar)) // WARNING: assumes null-terminated string such that line[r.second+1] always exist
            r.second++;

    }
    return r;
}

unsigned parseLine(vector<wstring>& fields, const wstring& line)
{
    unsigned ofs, ofs0, np;
    const wchar_t delim = L',';
    const wstring whitespace = L" \t\xa0\x3000\x2000\x2001\x2002\x2003\x2004\x2005\x2006\x2007\x2008\x2009\x200a\x202f\x205f";
    const wchar_t quotChar = L'\"';
    pair<unsigned, unsigned> quot;

    fields.clear();

    ofs = ofs0 = 0;
    quot = nextCSVQuotePair(line, quotChar);
    while((np = line.find(delim, ofs)) != wstring::npos)
    {
        if((np > quot.first) && (np < quot.second))
        { // skip delimiter inside quoted field
            ofs = quot.second+1;
            quot = nextCSVQuotePair(line, quotChar, ofs);
            continue;
        }
        fields.push_back( trimquote(line.substr(ofs0, np-ofs0), whitespace, quotChar) );
        ofs = ofs0 = np+1;
    }
    fields.push_back( trimquote(line.substr(ofs0), whitespace, quotChar) );

    return fields.size();
}

— फेबियन
स्रोत

1

यहां एक तैयार-टू-यूज़ फंक्शन है अगर आपको ज़रूरत है तो डबल्स की डेटा फ़ाइल (कोई पूर्णांक, कोई पाठ नहीं) लोड करने की।

#include <sstream>
#include <fstream>
#include <iterator>
#include <string>
#include <vector>
#include <algorithm>

using namespace std;

/**
 * Parse a CSV data file and fill the 2d STL vector "data".
 * Limits: only "pure datas" of doubles, not encapsulated by " and without \n inside.
 * Further no formatting in the data (e.g. scientific notation)
 * It however handles both dots and commas as decimal separators and removes thousand separator.
 * 
 * returnCodes[0]: file access 0-> ok 1-> not able to read; 2-> decimal separator equal to comma separator
 * returnCodes[1]: number of records
 * returnCodes[2]: number of fields. -1 If rows have different field size
 * 
 */
vector<int>
readCsvData (vector <vector <double>>& data, const string& filename, const string& delimiter, const string& decseparator){

 int vv[3] = { 0,0,0 };
 vector<int> returnCodes(&vv[0], &vv[0]+3);

 string rowstring, stringtoken;
 double doubletoken;
 int rowcount=0;
 int fieldcount=0;
 data.clear();

 ifstream iFile(filename, ios_base::in);
 if (!iFile.is_open()){
   returnCodes[0] = 1;
   return returnCodes;
 }
 while (getline(iFile, rowstring)) {
    if (rowstring=="") continue; // empty line
    rowcount ++; //let's start with 1
    if(delimiter == decseparator){
      returnCodes[0] = 2;
      return returnCodes;
    }
    if(decseparator != "."){
     // remove dots (used as thousand separators)
     string::iterator end_pos = remove(rowstring.begin(), rowstring.end(), '.');
     rowstring.erase(end_pos, rowstring.end());
     // replace decimal separator with dots.
     replace(rowstring.begin(), rowstring.end(),decseparator.c_str()[0], '.'); 
    } else {
     // remove commas (used as thousand separators)
     string::iterator end_pos = remove(rowstring.begin(), rowstring.end(), ',');
     rowstring.erase(end_pos, rowstring.end());
    }
    // tokenize..
    vector<double> tokens;
    // Skip delimiters at beginning.
    string::size_type lastPos = rowstring.find_first_not_of(delimiter, 0);
    // Find first "non-delimiter".
    string::size_type pos     = rowstring.find_first_of(delimiter, lastPos);
    while (string::npos != pos || string::npos != lastPos){
        // Found a token, convert it to double add it to the vector.
        stringtoken = rowstring.substr(lastPos, pos - lastPos);
        if (stringtoken == "") {
      tokens.push_back(0.0);
    } else {
          istringstream totalSString(stringtoken);
      totalSString >> doubletoken;
      tokens.push_back(doubletoken);
    }     
        // Skip delimiters.  Note the "not_of"
        lastPos = rowstring.find_first_not_of(delimiter, pos);
        // Find next "non-delimiter"
        pos = rowstring.find_first_of(delimiter, lastPos);
    }
    if(rowcount == 1){
      fieldcount = tokens.size();
      returnCodes[2] = tokens.size();
    } else {
      if ( tokens.size() != fieldcount){
    returnCodes[2] = -1;
      }
    }
    data.push_back(tokens);
 }
 iFile.close();
 returnCodes[1] = rowcount;
 return returnCodes;
}

— Antonello
स्रोत

1

एक और त्वरित और आसान तरीका है Boost.Fusion I/O:

#include <iostream>
#include <sstream>

#include <boost/fusion/adapted/boost_tuple.hpp>
#include <boost/fusion/sequence/io.hpp>

namespace fusion = boost::fusion;

struct CsvString
{
    std::string value;

    // Stop reading a string once a CSV delimeter is encountered.
    friend std::istream& operator>>(std::istream& s, CsvString& v) {
        v.value.clear();
        for(;;) {
            auto c = s.peek();
            if(std::istream::traits_type::eof() == c || ',' == c || '\n' == c)
                break;
            v.value.push_back(c);
            s.get();
        }
        return s;
    }

    friend std::ostream& operator<<(std::ostream& s, CsvString const& v) {
        return s << v.value;
    }
};

int main() {
    std::stringstream input("abc,123,true,3.14\n"
                            "def,456,false,2.718\n");

    typedef boost::tuple<CsvString, int, bool, double> CsvRow;

    using fusion::operator<<;
    std::cout << std::boolalpha;

    using fusion::operator>>;
    input >> std::boolalpha;
    input >> fusion::tuple_open("") >> fusion::tuple_close("\n") >> fusion::tuple_delimiter(',');

    for(CsvRow row; input >> row;)
        std::cout << row << '\n';
}

आउटपुट:

(abc 123 true 3.14)
(def 456 false 2.718)

— मैक्सिम इगोरुशिन
स्रोत

1

मैंने CSV फ़ाइलों को पार्स करने का एक अच्छा तरीका लिखा और मुझे लगा कि मुझे इसे उत्तर के रूप में जोड़ना चाहिए:

#include <algorithm>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <stdio.h>

struct CSVDict
{
  std::vector< std::string > inputImages;
  std::vector< double > inputLabels;
};

/**
\brief Splits the string

\param str String to split
\param delim Delimiter on the basis of which splitting is to be done
\return results Output in the form of vector of strings
*/
std::vector<std::string> stringSplit( const std::string &str, const std::string &delim )
{
  std::vector<std::string> results;

  for (size_t i = 0; i < str.length(); i++)
  {
    std::string tempString = "";
    while ((str[i] != *delim.c_str()) && (i < str.length()))
    {
      tempString += str[i];
      i++;
    }
    results.push_back(tempString);
  }

  return results;
}

/**
\brief Parse the supplied CSV File and obtain Row and Column information. 

Assumptions:
1. Header information is in first row
2. Delimiters are only used to differentiate cell members

\param csvFileName The full path of the file to parse
\param inputColumns The string of input columns which contain the data to be used for further processing
\param inputLabels The string of input labels based on which further processing is to be done
\param delim The delimiters used in inputColumns and inputLabels
\return Vector of Vector of strings: Collection of rows and columns
*/
std::vector< CSVDict > parseCSVFile( const std::string &csvFileName, const std::string &inputColumns, const std::string &inputLabels, const std::string &delim )
{
  std::vector< CSVDict > return_CSVDict;
  std::vector< std::string > inputColumnsVec = stringSplit(inputColumns, delim), inputLabelsVec = stringSplit(inputLabels, delim);
  std::vector< std::vector< std::string > > returnVector;
  std::ifstream inFile(csvFileName.c_str());
  int row = 0;
  std::vector< size_t > inputColumnIndeces, inputLabelIndeces;
  for (std::string line; std::getline(inFile, line, '\n');)
  {
    CSVDict tempDict;
    std::vector< std::string > rowVec;
    line.erase(std::remove(line.begin(), line.end(), '"'), line.end());
    rowVec = stringSplit(line, delim);

    // for the first row, record the indeces of the inputColumns and inputLabels
    if (row == 0)
    {
      for (size_t i = 0; i < rowVec.size(); i++)
      {
        for (size_t j = 0; j < inputColumnsVec.size(); j++)
        {
          if (rowVec[i] == inputColumnsVec[j])
          {
            inputColumnIndeces.push_back(i);
          }
        }
        for (size_t j = 0; j < inputLabelsVec.size(); j++)
        {
          if (rowVec[i] == inputLabelsVec[j])
          {
            inputLabelIndeces.push_back(i);
          }
        }
      }
    }
    else
    {
      for (size_t i = 0; i < inputColumnIndeces.size(); i++)
      {
        tempDict.inputImages.push_back(rowVec[inputColumnIndeces[i]]);
      }
      for (size_t i = 0; i < inputLabelIndeces.size(); i++)
      {
        double test = std::atof(rowVec[inputLabelIndeces[i]].c_str());
        tempDict.inputLabels.push_back(std::atof(rowVec[inputLabelIndeces[i]].c_str()));
      }
      return_CSVDict.push_back(tempDict);
    }
    row++;
  }

  return return_CSVDict;
}

— scap3y
स्रोत

1

इसका उपयोग संभव है std::regex।

आपकी फ़ाइल के आकार और आपके पास उपलब्ध मेमोरी के आधार पर, यह संभव है कि इसे या तो लाइन से लाइन या पूरी तरह से एक में पढ़ा जाए std::string।

फ़ाइल को पढ़ने के लिए कोई भी उपयोग कर सकता है:

std::ifstream t("file.txt");
std::string sin((std::istreambuf_iterator<char>(t)),
                 std::istreambuf_iterator<char>());

तब आप इस के साथ मेल कर सकते हैं जो वास्तव में आपकी आवश्यकताओं के लिए अनुकूलन योग्य है।

std::regex word_regex(",\\s]+");
auto what = 
    std::sregex_iterator(sin.begin(), sin.end(), word_regex);
auto wend = std::sregex_iterator();

std::vector<std::string> v;
for (;what!=wend ; wend) {
    std::smatch match = *what;
    v.push_back(match.str());
}

— g24l
स्रोत

1

के बाद से मैं अभी बढ़ावा देने के लिए इस्तेमाल नहीं कर रहा हूँ, मैं एक और अधिक सरल समाधान का सुझाव देंगे। मान लें कि आपकी .csv फ़ाइल में ',' द्वारा अलग की गई प्रत्येक पंक्ति में 10 संख्याओं के साथ 100 रेखाएँ हैं। आप इस डेटा को एक कोड के रूप में निम्न कोड के साथ लोड कर सकते हैं:

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
using namespace std;

int main()
{
    int A[100][10];
    ifstream ifs;
    ifs.open("name_of_file.csv");
    string s1;
    char c;
    for(int k=0; k<100; k++)
    {
        getline(ifs,s1);
        stringstream stream(s1);
        int j=0;
        while(1)
        {
            stream >>A[k][j];
            stream >> c;
            j++;
            if(!stream) {break;}
        }
    }


}

— nikos_k
स्रोत