BioloMICS logo
×
BioloMICS menu

Get Genbank Sequences

 
Code sample below shows how to get sequences from Genbank.
 
Imports System
Imports System.Windows.Forms
Imports System.Drawing
Imports System.Threading
Imports System.Collections
Imports System.Diagnostics
Imports BioloMICS.BioCallback
Imports BioloMICS.SharedClasses
Imports System.Text
Imports System.Xml
 
Public Class MainForm
     Inherits System.Windows.Forms.Form
          'Your sub goes here
     Public Sub New()
          InitializeComponent()
          me.TopMost = true
     End Sub
    
     Sub InitializeComponent
          Me.btnSearch = New System.Windows.Forms.Button()
          Me.txtGBNumberToSearch = New System.Windows.Forms.TextBox()
          Me.lblTitle = New System.Windows.Forms.Label()
          Me.dgvListOfResults = New System.Windows.Forms.DataGridView()
          Me.Organism = New System.Windows.Forms.DataGridViewTextBoxColumn()
          Me.Namer = New System.Windows.Forms.DataGridViewTextBoxColumn()
          Me.MB = New System.Windows.Forms.DataGridViewTextBoxColumn()
          Me.Status = New System.Windows.Forms.DataGridViewTextBoxColumn()
          Me.Year = New System.Windows.Forms.DataGridViewTextBoxColumn()
          CType(Me.dgvListOfResults,System.ComponentModel.ISupportInitialize).BeginInit
          Me.SuspendLayout
          '
          'btnSearch
          '
          Me.btnSearch.Location = New System.Drawing.Point(197, 28)
          Me.btnSearch.Name = "btnSearch"
          Me.btnSearch.Size = New System.Drawing.Size(75, 23)
          Me.btnSearch.TabIndex = 0
          Me.btnSearch.Text = "Search"
          Me.btnSearch.UseCompatibleTextRendering = true
          Me.btnSearch.UseVisualStyleBackColor = true
          AddHandler Me.btnSearch.Click, AddressOf Me.BtnSearchClick
          '
          'txtGBNumberToSearch
          '
          Me.txtGBNumberToSearch.Location = New System.Drawing.Point(12, 31)
          Me.txtGBNumberToSearch.Name = "txtGBNumberToSearch"
          Me.txtGBNumberToSearch.Size = New System.Drawing.Size(178, 20)
          Me.txtGBNumberToSearch.TabIndex = 1
          Me.txtGBNumberToSearch.Text = "AF123456"
          '
          'lblTitle
          '
          Me.lblTitle.Location = New System.Drawing.Point(11, 15)
          Me.lblTitle.Name = "lblTitle"
          Me.lblTitle.Size = New System.Drawing.Size(178, 23)
          Me.lblTitle.TabIndex = 2
          Me.lblTitle.Text = "Accession number to search for:"
          Me.lblTitle.UseCompatibleTextRendering = true
          '
          'dgvListOfResults
          '
          Me.dgvListOfResults.AllowUserToAddRows = false
          Me.dgvListOfResults.AllowUserToDeleteRows = false
          Me.dgvListOfResults.Anchor = CType((((System.Windows.Forms.AnchorStyles.Top Or System.Windows.Forms.AnchorStyles.Bottom)  _
                              Or System.Windows.Forms.AnchorStyles.Left)  _
                              Or System.Windows.Forms.AnchorStyles.Right),System.Windows.Forms.AnchorStyles)
          Me.dgvListOfResults.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize
          Me.dgvListOfResults.Columns.AddRange(New System.Windows.Forms.DataGridViewColumn() {Me.Organism, Me.Namer, Me.MB, Me.Status, Me.Year})
          Me.dgvListOfResults.Location = New System.Drawing.Point(12, 57)
          Me.dgvListOfResults.Name = "dgvListOfResults"
          Me.dgvListOfResults.ReadOnly = true
          Me.dgvListOfResults.Size = New System.Drawing.Size(701, 395)
          Me.dgvListOfResults.TabIndex = 3
          AddHandler Me.dgvListOfResults.CellDoubleClick, AddressOf Me.DgvListOfResultsDoubleClick
          '
          'Organism
          '
          Me.Organism.HeaderText = "Organism"
          Me.Organism.Name = "Organism"
          Me.Organism.ReadOnly = true
          Me.Organism.Width = 150
          '
          'Namer
          '
          Me.Namer.HeaderText = "Accession number version"
          Me.Namer.Name = "Namer"
          Me.Namer.ReadOnly = true
          '
          'MB
          '
          Me.MB.HeaderText = "Accession number"
          Me.MB.Name = "MB"
          Me.MB.ReadOnly = true
          '
          'Status
          '
          Me.Status.HeaderText = "Sequence"
          Me.Status.MaxInputLength = 1000000
          Me.Status.Name = "Status"
          Me.Status.ReadOnly = true
          Me.Status.Width = 200
          '
          'Year
          '
          Me.Year.HeaderText = "Year"
          Me.Year.Name = "Year"
          Me.Year.ReadOnly = true
          '
          'MainForm
          '
          Me.AcceptButton = Me.btnSearch
          Me.ClientSize = New System.Drawing.Size(725, 464)
          Me.Controls.Add(Me.dgvListOfResults)
          Me.Controls.Add(Me.txtGBNumberToSearch)
          Me.Controls.Add(Me.btnSearch)
          Me.Controls.Add(Me.lblTitle)
          Me.Name = "MainForm"
          Me.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen
          Me.Text = "MainForm"
          CType(Me.dgvListOfResults,System.ComponentModel.ISupportInitialize).EndInit
          Me.ResumeLayout(false)
          Me.PerformLayout
     End Sub
     Private Organism As System.Windows.Forms.DataGridViewTextBoxColumn
     Private txtGBNumberToSearch As System.Windows.Forms.TextBox
     Private Year As System.Windows.Forms.DataGridViewTextBoxColumn
     Private Status As System.Windows.Forms.DataGridViewTextBoxColumn
     Private Namer As System.Windows.Forms.DataGridViewTextBoxColumn
     Private dgvListOfResults As System.Windows.Forms.DataGridView
     Private MB As System.Windows.Forms.DataGridViewTextBoxColumn
     Private lblTitle As System.Windows.Forms.Label
     Private btnSearch As System.Windows.Forms.Button
 
     <STAThread()> _
     Public Shared Sub Main()
          dim frm as new MainForm()
          frm.ShowDialog()
     End Sub
    
     Public Sub GenbankQuery()
          ' Clear the results grid
          dgvListOfResults.Rows.Clear
         
          ' Create a WebRequest to the remote site (webservice of Genbank)
          Dim request As System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=" & txtGBNumberToSearch.text & "&retmode=XML")
         
          ' NB! Use the following line ONLY if the website is protected
          'request.Credentials = New System.Net.NetworkCredential("username", "password")
         
          ' Call the remote site, and parse the data in a response object
          Dim response As System.Net.HttpWebResponse = request.GetResponse()
         
          ' Check if the response is OK (status code 200)
          If response.StatusCode = System.Net.HttpStatusCode.OK Then
         
               ' Parse the contents from the response to a stream object
               Dim stream As System.IO.Stream = response.GetResponseStream()
               ' Create a reader for the stream object
               Dim reader As New System.IO.StreamReader(stream)
               ' Read from the stream object using the reader, put the contents in a string
               Dim contents As String = reader.ReadToEnd()
               ' Create a new, empty XML document
               Dim document As New XmlDocument()
              
               ' Load the contents into the XML document
               document.LoadXml(contents)
               Dim nodereader As New XmlNodeReader(document)
               Dim accessionVersion as String = ""
               Dim locusName as String = ""
               Dim pubYear As String = ""
               Dim organismName as String = ""
               While nodereader.Read()
                    ' Read the XmlDocument as a stream of XML
                    ' If node name is "name" get its content
                    If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_accession-version" Then
                         ' Keep the name in memory until the Accession number is found (this is the last node that we want to get in this example)
                         accessionVersion = nodereader.ReadString()
                    End If                   
                    ' If node name is "GBSeq_organism" get its content
                    If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_organism" Then
                         organismName = nodereader.ReadString()
                    End If    
                    ' If node name is "GBSeq_locus" get its content
                    If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_locus" Then
                         locusName = nodereader.ReadString()
                    End If    
                    ' If node name is "GBSeq_update-date" get its content
                    If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_update-date" Then
                         pubYear = nodereader.ReadString()
                    End If    
                    ' If node name is "GBSeq_sequence" get its content and reset variables
                    If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_sequence" Then
                         ' Add row to the results grid
                         dgvListOfResults.Rows.Add()
                         ' Add the Organism name to the the first cell (Organism name column; index always starts at 0)
                         dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(0).Value = organismName
                         ' Add the Accession version to the the first cell (Accession version column)
                         dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(1).Value = accessionVersion
                         ' Add the Locus name to the second cell (Locus name column)
                         dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(2).Value = locusName
                         ' Add the sequence to the third cell (sequence column)
                         dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(3).Value = nodereader.ReadString()
                         ' Add the date of last update to the fourth cell (Year column)
                         dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(4).Value = pubYear
                         'Reset the variables for the possible next record found
                         accessionVersion = ""
                         locusName = ""
                         pubYear = ""
                         organismName = ""
                    End If
               End While
          Else
               Throw New Exception("Could not retrieve document from the URL, response code: " & response.StatusCode)
          End If
     End Sub
    
     Sub BtnSearchClick(sender As Object, e As EventArgs)
          GenbankQuery()
     End Sub
    
     Sub DgvListOfResultsDoubleClick(sender As Object, e As DataGridViewCellEventArgs)
          ' Add the selected item URL to the clipboard and close the form
          ' Clear the clipboard if something was in it
          Clipboard.Clear
          ' Add the URL of the selected species using its genbank number
          Clipboard.SetText("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=" & dgvListOfResults.Rows(e.RowIndex).Cells(1).Value)
          ' Sedn a message to the end-user that everything is done ;)
          microsoft.VisualBasic.MsgBox("URL is now in your clipboard")
          me.close
     End Sub
End Class