Commit 79266248 by Paktalin

created separate columns for double forms

parent e4c2aa75
import pandas as pd
from util import get_postimees_urls, get_verbs_gf, get_text
from util import get_postimees_urls, get_text
from preprocessing import get_verbs_gf
import progressbar
import numpy as np
from tqdm import tqdm
print("getting verbs...")
verbs = get_verbs_gf()
print(verbs)
# retrieve liks to postimees articles
print("getting postimees urls...")
postimees_urls = get_postimees_urls()
# print("getting postimees urls...")
# postimees_urls = get_postimees_urls()
print("extracting text from the urls...")
articles = []
for i in tqdm(range(len(postimees_urls))):
url = postimees_urls[i]
articles.append(get_text(url))
# print("extracting text from the urls...")
# articles = []
# for i in tqdm(range(len(postimees_urls))):
# url = postimees_urls[i]
# articles.append(get_text(url))
# try to find a verb in an article
for column in verbs:
verb_form = verbs.iloc[2][column]
if type(verb_form) is str:
print(verb_form)
print(str(articles[0].find(verb_form)))
\ No newline at end of file
# # try to find a verb in an article
# for column in verbs:
# verb_form = verbs.iloc[2][column]
# if type(verb_form) is str:
# print(verb_form)
# print(str(articles[0].find(verb_form)))
\ No newline at end of file
import pandas as pd
import numpy as np
def get_verbs_gf():
# read file as dataframe
df read_csv()
df = read_csv()
df = split_double(df)
return df
def read_csv():
df = pd.read_csv("verbs_gf.csv", sep=",", names=columns, encoding='utf8')
df = pd.read_csv("verbs_gf.csv", sep=", ", encoding='utf8', header=None, engine='python')
return df
def double(column):
new_column = column.str.split('|', expand=True)
try:
column = new_column[0]
new_column = new_column[1]
except Exception as e:
new_column = [None]*len(new_column)
return column, new_column
def split_double(df):
for column_name in df.columns:
second_column_name = str(column_name) + "double"
df[column_name], df[second_column_name] = double(df[column_name])
return df
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment