Help with tokenize an input text file
CodeGuru Home VC++ / MFC / C++ .NET / C# Visual Basic VB Forums Developer.com
Results 1 to 5 of 5

Thread: Help with tokenize an input text file

  1. #1
    Join Date
    Feb 2010
    Posts
    2

    Help with tokenize an input text file

    Hi..i need to tokenize my input text file and then categorize each word. The category is noun, verb, adjective or adverb. Here is my code. Can anyone help me?



    public class Grammar extends JFrame implements ActionListener
    {
    final static boolean shouldFill = true;
    final static boolean shouldWeightX = true;
    final static boolean RIGHT_TO_LEFT = false;

    private JMenuItem jmiSave, jmiOpen, jmiExit, jmiAbout, jmiToken, jmiTagger, jmiCheck;
    private JButton token = new JButton ("Tokenize Sentence");
    private JButton tagger = new JButton ("Tagger Word");
    private JButton check = new JButton ("Check Grammar");
    private JButton clear = new JButton ("Clear Text");

    private JTextArea jtaOpen = new JTextArea();
    private JTextArea jtaToken = new JTextArea();
    private JTextArea jtaError = new JTextArea();

    private JLabel jlblStatus = new JLabel();
    private JPanel jpc = new JPanel();
    private JFileChooser jFileChooser = new JFileChooser();
    private Map map = new TreeMap();
    private String delim = " \t\n.,:;?!-/()[]\"\'1234567890 ";


    public Grammar()
    {
    setTitle("Malay Grammar Checker");
    setSize(1000,700);

    JMenuBar menuBar = new JMenuBar();
    setJMenuBar(menuBar);

    JMenu fileMenu = new JMenu("File");
    JMenu operationMenu = new JMenu ("Operation");
    JMenu helpMenu = new JMenu("Help");
    fileMenu.setMnemonic('F');
    operationMenu.setMnemonic('O');
    helpMenu.setMnemonic('H');
    menuBar.add(fileMenu);
    menuBar.add(operationMenu);
    menuBar.add(helpMenu);


    fileMenu.add(jmiSave = new JMenuItem("Save",'N'));
    fileMenu.add(jmiOpen = new JMenuItem("Open",'O'));
    fileMenu.add(jmiExit = new JMenuItem("Exit",'E'));
    operationMenu.add(jmiToken = new JMenuItem ("Tokenize", 'T'));
    operationMenu.add(jmiTagger = new JMenuItem ("Tagger", 'G'));
    operationMenu.add(jmiCheck = new JMenuItem ("Check Grammar", 'C'));
    helpMenu.add(jmiAbout = new JMenuItem("About Grammar Checker",'A'));


    jmiSave.addActionListener(this);
    jmiOpen.addActionListener(this);
    jmiExit.addActionListener(this);
    jmiToken.addActionListener(this);
    jmiTagger.addActionListener(this);
    jmiCheck.addActionListener(this);
    jmiAbout.addActionListener(this);


    jFileChooser.setCurrentDirectory(new File("."));


    jtaOpen.setLineWrap(true);
    jtaOpen.setWrapStyleWord(true);
    jtaOpen.setEditable(true);

    JScrollPane areaScrollPane = new JScrollPane(jtaOpen);
    areaScrollPane.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
    areaScrollPane.setPreferredSize(new Dimension(500,500));
    areaScrollPane.setBorder(BorderFactory.createCompoundBorder(
    BorderFactory.createCompoundBorder(BorderFactory
    .createTitledBorder("Display Text"), BorderFactory
    .createEmptyBorder(8, 8, 8, 8)), areaScrollPane
    .getBorder()));
    JPanel p1 = new JPanel(new BorderLayout());
    p1.add( areaScrollPane, BorderLayout.WEST);
    add(p1, BorderLayout.LINE_START);

    jtaToken.setEditable(false);
    JScrollPane displayScrollPane = new JScrollPane(jtaToken);
    displayScrollPane.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
    displayScrollPane.setPreferredSize(new Dimension(300,500));//(X:lebar, Y:tgi)
    displayScrollPane.setBorder(BorderFactory.createCompoundBorder(
    BorderFactory.createCompoundBorder(BorderFactory
    .createTitledBorder("Tokenize the Sentences"), BorderFactory
    .createEmptyBorder(8, 8, 8, 8)), displayScrollPane
    .getBorder()));
    JPanel p2 = new JPanel(new BorderLayout());
    p2.add(displayScrollPane, BorderLayout.EAST);
    add(p2, BorderLayout.LINE_END);


    JPanel p3 = new JPanel();

    p3.add(token);
    clear.setToolTipText("Tokenize the sentence");
    clear.addActionListener(new ActionListener()

    {
    public void actionPerformed(ActionEvent e)
    {
    jtaOpen.setText("");
    }
    });

    p3.add(tagger);
    clear.setToolTipText("Tag every word");
    clear.addActionListener(new ActionListener()

    {
    public void actionPerformed(ActionEvent e)
    {
    jtaOpen.setText("");
    }
    });

    p3.add(check);
    check.setToolTipText("Check The Grammar");
    check.addActionListener(new ActionListener()

    {
    public void actionPerformed(ActionEvent e)
    {

    }
    });

    p3.add(clear);
    clear.setToolTipText("Clear open text");
    clear.addActionListener(new ActionListener()

    {
    public void actionPerformed(ActionEvent e)
    {
    jtaOpen.setText("");
    }
    });


    Container c = getContentPane();
    c.add(p3, BorderLayout.SOUTH);
    c.setLayout(new GridLayout(2,2,2,2));



    }



    public void actionPerformed(ActionEvent e)
    {

    String actionCommand = e.getActionCommand();

    if (e.getSource() instanceof JMenuItem)
    {
    if("Save".equals(actionCommand))
    saveFile();

    else if("Open".equals(actionCommand))
    open();


    else if ("Exit".equals(actionCommand))


    System.exit(0);

    }


    if ( jmiAbout == e.getSource() ) {
    JOptionPane.showMessageDialog(Grammar.this,
    "Malay Grammar Checker \n\nDeveloped by : Zazarina Bt Mustaffar \n" +
    "Faculty of Computer Science & Information Technology\n\nUniversiti Malaysia Sarawak \n" +
    "email: pocong_bacin17@yahoo.com ",
    "About", JOptionPane.PLAIN_MESSAGE );
    }
    }


    private void open()
    {
    if (jFileChooser.showOpenDialog(this) == JFileChooser.APPROVE_OPTION)
    {
    jtaOpen.setText("");
    open(jFileChooser.getSelectedFile());
    createMap();
    }
    }


    private void open(File file)
    {
    try
    {

    BufferedInputStream in = new BufferedInputStream(new FileInputStream(file));
    byte[] b = new byte[in.available()];
    in.read(b, 0, b.length);
    jtaOpen.append(new String(b, 0, b.length));
    in.close();


    jlblStatus.setText(file.getName() + "Opened");
    }
    catch (IOException ex)
    {
    jlblStatus.setText("Error opening" + file.getName());
    }
    }



    private void saveFile()
    {
    if (jFileChooser.showSaveDialog(this) == JFileChooser.APPROVE_OPTION)
    {
    saveFile(jFileChooser.getSelectedFile());
    }
    }



    private void saveFile(File file) //save file with the specified file instance
    {
    try
    {

    BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(file));
    byte[] b = (jtaOpen.getText()).getBytes();
    out.write(b, 0,b.length);
    out.close();


    jlblStatus.setText(file.getName() + "Saved");
    }
    catch (IOException ex)
    {
    jlblStatus.setText("Error saving" + file.getName());
    }
    }



    private void createMap()
    {
    String open = jtaOpen.getText(); /
    StringTokenizer tokenizer = new StringTokenizer(open, delim); //tokenizes the string
    map = new TreeMap();
    while (tokenizer.hasMoreTokens())
    {

    String word = tokenizer.nextToken().toLowerCase().trim();

    if(!word.equals(""))
    {
    if ( map.containsKey(word))
    {
    Integer count = (Integer) map.get(word);
    //------------------increment value
    map.put(word, new Integer(count.intValue() + 1 ));
    }
    else //otherwise add word with a value of 1 to map
    {
    map.put(word, new Integer(1));
    }
    }

    }

    }



    public static void main(String[] args) throws Exception
    {
    Grammar g = new Grammar();
    g.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    g.setVisible(true);
    }

    }

  2. #2
    dlorde is offline Elite Member Power Poster
    Join Date
    Aug 1999
    Location
    UK
    Posts
    10,163

    Re: Help with tokenize an input text file

    Please use [CODE]...[/CODE] tags when posting code, so it stays readable.

    What, specifically, are you stuck on?

    Experience is a poor teacher: it gives its tests before it teaches its lessons...
    Anon.
    Please use [CODE]...your code here...[/CODE] tags when posting code. If you get an error, please post the full error message and stack trace, if present.

  3. #3
    Join Date
    Feb 2010
    Posts
    2

    Re: Help with tokenize an input text file

    Sorry for unreadable code. My first experience posting on forum. There is 2 things i need to do which is:

    (1) tokenize the word in jtaOpen and
    (2)categorize each word into its specification (noun, verb, adjective or adverb).

    The problem is when i want to categorize the words, i need to used WordNet. Since i'm using Malay words, there's no WordNet exist for Malay. What shall i do?

  4. #4
    Join Date
    May 2002
    Location
    Lindenhurst, NY
    Posts
    867

    Re: Help with tokenize an input text file

    Quote Originally Posted by miss zaza View Post
    What shall i do?
    Figure out the code tags. If you accomplish this, continue. If you cannot accomplish this, quit (your programming classes, because if that's too difficult....).

  5. #5
    dlorde is offline Elite Member Power Poster
    Join Date
    Aug 1999
    Location
    UK
    Posts
    10,163

    Re: Help with tokenize an input text file

    Quote Originally Posted by miss zaza View Post
    (1) tokenize the word in jtaOpen
    Sun have designated StringTokenizer a 'legacy' class (sort of half-way to being deprecated, I guess). They suggest using String.split(..) or the regex package classes. But anyway, what's the problem tokenizing the input string?

    (2)categorize each word into its specification (noun, verb, adjective or adverb).

    The problem is when i want to categorize the words, i need to used WordNet. Since i'm using Malay words, there's no WordNet exist for Malay. What shall i do?
    I don't know - your statement is paradoxical. If you believe you need to use WordNet and WordNet doesn't meet your requirements, you need to rethink either what you believe you need, or your requirements.

    If you don't think carefully, you might believe that programming is just typing statements in a programming language...
    W. Cunningham
    Please use [CODE]...your code here...[/CODE] tags when posting code. If you get an error, please post the full error message and stack trace, if present.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  


Windows Mobile Development Center


Click Here to Expand Forum to Full Width

This is a CodeGuru survey question.


Featured


HTML5 Development Center